Skip Unicode character expansion in assembly files
Summary: When using the C preprocessor with assembly files, either with a
capital `S` file extension, or with `-xassembler-with-cpp`, the Unicode escape
sequence `\u` is ignored. The `\u` pattern can be used for expanding a macro
argument that starts with `u`.
Author: Salman Arif <salman.arif@arm.com>
Reviewers: rengolin, olista01
Reviewed By: olista01
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D31765
llvm-svn: 299754
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 6025a66..4c05193 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
// UCNs (C99 6.4.3, C++11 [lex.charset]p2)
case '\\':
- if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
- if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
- if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
- return true; // KeepWhitespaceMode
+ if (!LangOpts.AsmPreprocessor) {
+ if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+ if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
- // We only saw whitespace, so just try again with this lexer.
- // (We manually eliminate the tail call to avoid recursion.)
- goto LexNextToken;
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
+
+ return LexUnicode(Result, CodePoint, CurPtr);
}
-
- return LexUnicode(Result, CodePoint, CurPtr);
}
Kind = tok::unknown;