Introduce optional "Invalid" parameters to routines that invoke the SourceManager's getBuffer() and, therefore, could fail, along with Preprocessor::getSpelling(). Use the Invalid parameters in the literal parsers (string, floating point, integral, character) to make them robust against errors that stem from, e.g., PCH files that are not consistent with the underlying file system. I still need to audit every use caller to all of these routines, to determine which ones need specific handling of error conditions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98608 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 50f6af7a6d6951a63f3da7d4c5a7d3965bf73b63 [log] [tgz]
author: Douglas Gregor <dgregor@apple.com> Tue Mar 16 05:20:39 2010 +0000
committer: Douglas Gregor <dgregor@apple.com> Tue Mar 16 05:20:39 2010 +0000
tree: 28f78b0fe61c0b7a80cf3ccf0d1c39a884986120
parent: 36c35ba0aca641e60e5dbee8efbc620c08b9bd61 [diff]
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 004e675..1cfa0e3 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp

@@ -806,7 +806,14 @@
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    bool StringInvalid = false;
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
+                                         &StringInvalid);
+    if (StringInvalid) {
+      hadError = 1;
+      continue;
+    }
+
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
 
     // TODO: Input character set mapping support.
@@ -904,8 +911,12 @@
   llvm::SmallString<16> SpellingBuffer;
   SpellingBuffer.resize(Tok.getLength());
 
+  bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr, &StringInvalid);
+  if (StringInvalid) {
+    return 0;
+  }
 
   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
 

diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index ede129e..756ce27 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp

@@ -170,7 +170,12 @@
     return true;
   case tok::numeric_constant: {
     llvm::SmallString<64> IntegerBuffer;
-    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer);
+    bool NumberInvalid = false;
+    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
+                                              &NumberInvalid);
+    if (NumberInvalid)
+      return true; // a diagnostic was already reported
+
     NumericLiteralParser Literal(Spelling.begin(), Spelling.end(),
                                  PeekTok.getLocation(), PP);
     if (Literal.hadError)
@@ -216,7 +221,10 @@
   }
   case tok::char_constant: {   // 'x'
     llvm::SmallString<32> CharBuffer;
-    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer);
+    bool CharInvalid = false;
+    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+    if (CharInvalid)
+      return true;
 
     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                               PeekTok.getLocation(), PP);

diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index a6efe7f..5584b18 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp

@@ -282,11 +282,19 @@
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok,
                                       const SourceManager &SourceMgr,
-                                      const LangOptions &Features) {
+                                      const LangOptions &Features, 
+                                      bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token contains nothing interesting, return it directly.
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
 
@@ -310,8 +318,8 @@
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok) const {
-  return getSpelling(Tok, SourceMgr, Features);
+std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
+  return getSpelling(Tok, SourceMgr, Features, Invalid);
 }
 
 /// getSpelling - This method is used to get the spelling of a token into a
@@ -325,7 +333,7 @@
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
 unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer) const {
+                                   const char *&Buffer, bool *Invalid) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token is an identifier, just return the string from the identifier
@@ -341,8 +349,16 @@
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
 
-  if (TokStart == 0)
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
@@ -368,7 +384,8 @@
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
-                                    llvm::SmallVectorImpl<char> &Buffer) const {
+                                          llvm::SmallVectorImpl<char> &Buffer,
+                                          bool *Invalid) const {
   // Try the fast path.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo())
     return II->getName();
@@ -378,7 +395,7 @@
     Buffer.resize(Tok.getLength());
 
   const char *Ptr = Buffer.data();
-  unsigned Len = getSpelling(Tok, Ptr);
+  unsigned Len = getSpelling(Tok, Ptr, Invalid);
   return llvm::StringRef(Ptr, Len);
 }
commit	50f6af7a6d6951a63f3da7d4c5a7d3965bf73b63	[log] [tgz]
author	Douglas Gregor <dgregor@apple.com>	Tue Mar 16 05:20:39 2010 +0000
committer	Douglas Gregor <dgregor@apple.com>	Tue Mar 16 05:20:39 2010 +0000
tree	28f78b0fe61c0b7a80cf3ccf0d1c39a884986120
parent	36c35ba0aca641e60e5dbee8efbc620c08b9bd61 [diff]