blob: f6eed70394223a75e71085d117f6da784949d2bc [file] [log] [blame]
Richard Thomson8930aab2016-03-27 16:43:44 +00001//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "RawStringLiteralCheck.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/ASTMatchers/ASTMatchFinder.h"
13#include "clang/Lex/Lexer.h"
14
15using namespace clang::ast_matchers;
16
17namespace clang {
18namespace tidy {
19namespace modernize {
20
21namespace {
22
23bool containsEscapes(StringRef HayStack, StringRef Escapes) {
24 size_t BackSlash = HayStack.find('\\');
25 if (BackSlash == StringRef::npos)
26 return false;
27
28 while (BackSlash != StringRef::npos) {
29 if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
30 return false;
31 BackSlash = HayStack.find('\\', BackSlash + 2);
32 }
33
34 return true;
35}
36
37bool isRawStringLiteral(StringRef Text) {
38 // Already a raw string literal if R comes before ".
39 const size_t QuotePos = Text.find('"');
40 assert(QuotePos != StringRef::npos);
41 return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
42}
43
44bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
45 const StringLiteral *Literal) {
46 // FIXME: Handle L"", u8"", u"" and U"" literals.
47 if (!Literal->isAscii())
48 return false;
49
50 StringRef Bytes = Literal->getBytes();
51 // Non-printing characters disqualify this literal:
52 // \007 = \a bell
53 // \010 = \b backspace
54 // \011 = \t horizontal tab
55 // \012 = \n new line
56 // \013 = \v vertical tab
57 // \014 = \f form feed
58 // \015 = \r carriage return
59 // \177 = delete
60 if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
61 "\b\t\n\v\f\r\016\017"
62 "\020\021\022\023\024\025\026\027"
63 "\030\031\032\033\034\035\036\037"
64 "\177",
65 33)) != StringRef::npos)
66 return false;
67
68 CharSourceRange CharRange = Lexer::makeFileCharRange(
69 CharSourceRange::getTokenRange(Literal->getSourceRange()),
70 *Result.SourceManager, Result.Context->getLangOpts());
71 StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
72 Result.Context->getLangOpts());
73 if (isRawStringLiteral(Text))
74 return false;
75
76 return containsEscapes(Text, R"('\"?x01)");
77}
78
79bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
80 return Bytes.find(Delimiter.empty()
81 ? std::string(R"lit()")lit")
82 : (")" + Delimiter + R"(")")) != StringRef::npos;
83}
84
85std::string asRawStringLiteral(const StringLiteral *Literal,
86 const std::string &DelimiterStem) {
87 const StringRef Bytes = Literal->getBytes();
88 std::string Delimiter;
89 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
90 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
91 }
92
93 if (Delimiter.empty())
94 return (R"(R"()" + Bytes + R"lit()")lit").str();
95
96 return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
97}
98
99} // namespace
100
101RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
102 ClangTidyContext *Context)
103 : ClangTidyCheck(Name, Context),
Gabor Horvath3ac2ad7d6c2017-01-24 15:18:11 +0000104 DelimiterStem(Options.get("DelimiterStem", "lit")),
105 ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {}
Richard Thomson8930aab2016-03-27 16:43:44 +0000106
107void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
108 ClangTidyCheck::storeOptions(Options);
Gabor Horvath3ac2ad7d6c2017-01-24 15:18:11 +0000109 this->Options.store(Options, "ReplaceShorterLiterals",
110 ReplaceShorterLiterals);
Richard Thomson8930aab2016-03-27 16:43:44 +0000111}
112
113void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
Gabor Horvathafad84c2016-09-24 02:13:45 +0000114 // Raw string literals require C++11 or later.
115 if (!getLangOpts().CPlusPlus11)
116 return;
117
Alexander Kornienkobfb43b72016-04-21 14:39:12 +0000118 Finder->addMatcher(
119 stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
Richard Thomson8930aab2016-03-27 16:43:44 +0000120}
121
122void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
Richard Thomson8930aab2016-03-27 16:43:44 +0000123 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
124 if (Literal->getLocStart().isMacroID())
125 return;
126
Gabor Horvath3ac2ad7d6c2017-01-24 15:18:11 +0000127 if (containsEscapedCharacters(Result, Literal)) {
128 std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
129 if (ReplaceShorterLiterals ||
130 Replacement.length() <=
131 Lexer::MeasureTokenLength(Literal->getLocStart(),
132 *Result.SourceManager, getLangOpts()))
133 replaceWithRawStringLiteral(Result, Literal, Replacement);
134 }
Richard Thomson8930aab2016-03-27 16:43:44 +0000135}
136
137void RawStringLiteralCheck::replaceWithRawStringLiteral(
Gabor Horvath3ac2ad7d6c2017-01-24 15:18:11 +0000138 const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
139 StringRef Replacement) {
Richard Thomson8930aab2016-03-27 16:43:44 +0000140 CharSourceRange CharRange = Lexer::makeFileCharRange(
141 CharSourceRange::getTokenRange(Literal->getSourceRange()),
Gabor Horvathafad84c2016-09-24 02:13:45 +0000142 *Result.SourceManager, getLangOpts());
Richard Thomson8930aab2016-03-27 16:43:44 +0000143 diag(Literal->getLocStart(),
144 "escaped string literal can be written as a raw string literal")
Gabor Horvath3ac2ad7d6c2017-01-24 15:18:11 +0000145 << FixItHint::CreateReplacement(CharRange, Replacement);
Richard Thomson8930aab2016-03-27 16:43:44 +0000146}
147
148} // namespace modernize
149} // namespace tidy
150} // namespace clang