use the full spelling of a string literal token so that trigraphs
and escaped newlines don't throw off the offset computation.
On this testcase:
printf("abc\
def"
"%*d", (unsigned) 1, 1);
Before:
t.m:5:5: warning: field width should have type 'int', but argument has type 'unsigned int'
def"
^
after:
t.m:6:12: warning: field width should have type 'int', but argument has type 'unsigned int'
"%*d", (unsigned) 1, 1);
^ ~~~~~~~~~~~~
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@64930 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index f469684..6ffca1b 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -31,6 +31,8 @@
unsigned ByteNo) const {
assert(!SL->isWide() && "This doesn't work for wide strings yet");
+ llvm::SmallString<32> SpellingBuffer;
+
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
@@ -61,8 +63,13 @@
Token TheTok;
TheLexer.LexFromRawLexer(TheTok);
+ // Get the spelling of the token to remove trigraphs and escaped newlines.
+ SpellingBuffer.resize(TheTok.getLength());
+ const char *SpellingPtr = &SpellingBuffer[0];
+ unsigned TokLen = PP.getSpelling(TheTok, SpellingPtr);
+
// The length of the string is the token length minus the two quotes.
- unsigned TokNumBytes = TheTok.getLength()-2;
+ unsigned TokNumBytes = TokLen-2;
// If we found the token we're looking for, return the location.
// FIXME: This should consider character escapes!