improve isHexaLiteral to work with escaped newlines and trigraphs,
patch by Francois Pichet!
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112602 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 98277a4..917829b 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -921,13 +921,14 @@
}
/// isHexaLiteral - Return true if Start points to a hex constant.
-/// FIXME: This isn't correct, it will mislex:
-/// 0\ <- escaped newline.
-/// x1234e+1
/// in microsoft mode (where this is supposed to be several different tokens).
-static inline bool isHexaLiteral(const char *Start, const char *End) {
- return ((End - Start > 2) && Start[0] == '0' &&
- (Start[1] == 'x' || Start[1] == 'X'));
+static bool isHexaLiteral(const char *Start, const LangOptions &Features) {
+ unsigned Size;
+ char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, Features);
+ if (C1 != '0')
+ return false;
+ char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, Features);
+ return (C2 == 'x' || C2 == 'X');
}
/// LexNumericConstant - Lex the remainder of a integer or floating point
@@ -947,7 +948,7 @@
if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
// If we are in Microsoft mode, don't continue if the constant is hex.
// For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
- if (!Features.Microsoft || !isHexaLiteral(BufferPtr, CurPtr))
+ if (!Features.Microsoft || !isHexaLiteral(BufferPtr, Features))
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
}
diff --git a/test/Lexer/ms-extensions.c b/test/Lexer/ms-extensions.c
index 5b45ab4..8b7d2e1 100644
--- a/test/Lexer/ms-extensions.c
+++ b/test/Lexer/ms-extensions.c
@@ -30,4 +30,12 @@
int var2 = 0X1111111e+1;
int var3 = 0xe+1;
int var4 = 0XE+1;
+
+ int var5= 0\
+x1234e+1;
+
+ int var6=
+ /*expected-warning {{backslash and newline separated by space}} */ 0\
+x1234e+1;
}
+