clang-tidy: Add check modernize-raw-string-literal

llvm-svn: 264539
diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
index f4a192a..b43b2a9 100644
--- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
@@ -7,6 +7,7 @@
   MakeUniqueCheck.cpp
   ModernizeTidyModule.cpp
   PassByValueCheck.cpp
+  RawStringLiteralCheck.cpp
   RedundantVoidArgCheck.cpp
   ReplaceAutoPtrCheck.cpp
   ShrinkToFitCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
index 7b0db71..d7bda45 100644
--- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -14,6 +14,7 @@
 #include "LoopConvertCheck.h"
 #include "MakeUniqueCheck.h"
 #include "PassByValueCheck.h"
+#include "RawStringLiteralCheck.h"
 #include "RedundantVoidArgCheck.h"
 #include "ReplaceAutoPtrCheck.h"
 #include "ShrinkToFitCheck.h"
@@ -36,6 +37,8 @@
     CheckFactories.registerCheck<LoopConvertCheck>("modernize-loop-convert");
     CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
     CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
+    CheckFactories.registerCheck<RawStringLiteralCheck>(
+        "modernize-raw-string-literal");
     CheckFactories.registerCheck<RedundantVoidArgCheck>(
         "modernize-redundant-void-arg");
     CheckFactories.registerCheck<ReplaceAutoPtrCheck>(
diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp
new file mode 100644
index 0000000..b9641c3
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp
@@ -0,0 +1,140 @@
+//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RawStringLiteralCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace modernize {
+
+namespace {
+
+bool containsEscapes(StringRef HayStack, StringRef Escapes) {
+  size_t BackSlash = HayStack.find('\\');
+  if (BackSlash == StringRef::npos)
+    return false;
+
+  while (BackSlash != StringRef::npos) {
+    if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
+      return false;
+    BackSlash = HayStack.find('\\', BackSlash + 2);
+  }
+
+  return true;
+}
+
+bool isRawStringLiteral(StringRef Text) {
+  // Already a raw string literal if R comes before ".
+  const size_t QuotePos = Text.find('"');
+  assert(QuotePos != StringRef::npos);
+  return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
+}
+
+bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
+                               const StringLiteral *Literal) {
+  // FIXME: Handle L"", u8"", u"" and U"" literals.
+  if (!Literal->isAscii())
+    return false;
+
+  StringRef Bytes = Literal->getBytes();
+  // Non-printing characters disqualify this literal:
+  // \007 = \a bell
+  // \010 = \b backspace
+  // \011 = \t horizontal tab
+  // \012 = \n new line
+  // \013 = \v vertical tab
+  // \014 = \f form feed
+  // \015 = \r carriage return
+  // \177 = delete
+  if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
+                                    "\b\t\n\v\f\r\016\017"
+                                    "\020\021\022\023\024\025\026\027"
+                                    "\030\031\032\033\034\035\036\037"
+                                    "\177",
+                                    33)) != StringRef::npos)
+    return false;
+
+  CharSourceRange CharRange = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(Literal->getSourceRange()),
+      *Result.SourceManager, Result.Context->getLangOpts());
+  StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
+                                        Result.Context->getLangOpts());
+  if (isRawStringLiteral(Text))
+    return false;
+
+  return containsEscapes(Text, R"('\"?x01)");
+}
+
+bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
+  return Bytes.find(Delimiter.empty()
+                        ? std::string(R"lit()")lit")
+                        : (")" + Delimiter + R"(")")) != StringRef::npos;
+}
+
+std::string asRawStringLiteral(const StringLiteral *Literal,
+                               const std::string &DelimiterStem) {
+  const StringRef Bytes = Literal->getBytes();
+  std::string Delimiter;
+  for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
+    Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
+  }
+
+  if (Delimiter.empty())
+    return (R"(R"()" + Bytes + R"lit()")lit").str();
+
+  return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
+}
+
+} // namespace
+
+RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
+                                             ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      DelimiterStem(Options.get("DelimiterStem", "lit")) {}
+
+void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
+  ClangTidyCheck::storeOptions(Options);
+}
+
+void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(stringLiteral().bind("lit"), this);
+}
+
+void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
+  // Raw string literals require C++11 or later.
+  if (!Result.Context->getLangOpts().CPlusPlus11)
+    return;
+
+  const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
+  if (Literal->getLocStart().isMacroID())
+    return;
+
+  if (containsEscapedCharacters(Result, Literal))
+    replaceWithRawStringLiteral(Result, Literal);
+}
+
+void RawStringLiteralCheck::replaceWithRawStringLiteral(
+    const MatchFinder::MatchResult &Result, const StringLiteral *Literal) {
+  CharSourceRange CharRange = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(Literal->getSourceRange()),
+      *Result.SourceManager, Result.Context->getLangOpts());
+  diag(Literal->getLocStart(),
+       "escaped string literal can be written as a raw string literal")
+      << FixItHint::CreateReplacement(
+          CharRange, asRawStringLiteral(Literal, DelimiterStem));
+}
+
+} // namespace modernize
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h
new file mode 100644
index 0000000..aabd435
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h
@@ -0,0 +1,45 @@
+//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+
+#include "../ClangTidy.h"
+//#include <string>
+
+namespace clang {
+namespace tidy {
+namespace modernize {
+
+/// This check replaces string literals with escaped characters to
+/// raw string literals.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html
+class RawStringLiteralCheck : public ClangTidyCheck {
+public:
+  RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context);
+
+  void storeOptions(ClangTidyOptions::OptionMap &Options) override;
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+  void replaceWithRawStringLiteral(
+      const ast_matchers::MatchFinder::MatchResult &Result,
+      const StringLiteral *Literal);
+
+  std::string DelimiterStem;
+};
+
+} // namespace modernize
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H