1 | //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "RawStringLiteralCheck.h" |
10 | #include "clang/AST/ASTContext.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | #include "clang/Lex/Lexer.h" |
13 | |
14 | using namespace clang::ast_matchers; |
15 | |
16 | namespace clang::tidy::modernize { |
17 | |
18 | namespace { |
19 | |
20 | bool containsEscapes(StringRef HayStack, StringRef Escapes) { |
21 | size_t BackSlash = HayStack.find(C: '\\'); |
22 | if (BackSlash == StringRef::npos) |
23 | return false; |
24 | |
25 | while (BackSlash != StringRef::npos) { |
26 | if (!Escapes.contains(C: HayStack[BackSlash + 1])) |
27 | return false; |
28 | BackSlash = HayStack.find(C: '\\', From: BackSlash + 2); |
29 | } |
30 | |
31 | return true; |
32 | } |
33 | |
34 | bool isRawStringLiteral(StringRef Text) { |
35 | // Already a raw string literal if R comes before ". |
36 | const size_t QuotePos = Text.find(C: '"'); |
37 | assert(QuotePos != StringRef::npos); |
38 | return (QuotePos > 0) && (Text[QuotePos - 1] == 'R'); |
39 | } |
40 | |
41 | bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, |
42 | const StringLiteral *Literal, |
43 | const CharsBitSet &DisallowedChars) { |
44 | // FIXME: Handle L"", u8"", u"" and U"" literals. |
45 | if (!Literal->isOrdinary()) |
46 | return false; |
47 | |
48 | for (const unsigned char C : Literal->getBytes()) |
49 | if (DisallowedChars.test(position: C)) |
50 | return false; |
51 | |
52 | CharSourceRange CharRange = Lexer::makeFileCharRange( |
53 | Range: CharSourceRange::getTokenRange(Literal->getSourceRange()), |
54 | SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts()); |
55 | StringRef Text = Lexer::getSourceText(Range: CharRange, SM: *Result.SourceManager, |
56 | LangOpts: Result.Context->getLangOpts()); |
57 | if (Text.empty() || isRawStringLiteral(Text)) |
58 | return false; |
59 | |
60 | return containsEscapes(HayStack: Text, Escapes: R"('\"?x01)" ); |
61 | } |
62 | |
63 | bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) { |
64 | return Bytes.find(Str: Delimiter.empty() |
65 | ? std::string(R"lit()")lit" ) |
66 | : (")" + Delimiter + R"(")" )) != StringRef::npos; |
67 | } |
68 | |
69 | std::string asRawStringLiteral(const StringLiteral *Literal, |
70 | const std::string &DelimiterStem) { |
71 | const StringRef Bytes = Literal->getBytes(); |
72 | std::string Delimiter; |
73 | for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { |
74 | Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(val: I); |
75 | } |
76 | |
77 | if (Delimiter.empty()) |
78 | return (R"(R"()" + Bytes + R"lit()")lit" ).str(); |
79 | |
80 | return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" ).str(); |
81 | } |
82 | |
83 | } // namespace |
84 | |
85 | RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name, |
86 | ClangTidyContext *Context) |
87 | : ClangTidyCheck(Name, Context), |
88 | DelimiterStem(Options.get(LocalName: "DelimiterStem" , Default: "lit" )), |
89 | ReplaceShorterLiterals(Options.get(LocalName: "ReplaceShorterLiterals" , Default: false)) { |
90 | // Non-printing characters are disallowed: |
91 | // \007 = \a bell |
92 | // \010 = \b backspace |
93 | // \011 = \t horizontal tab |
94 | // \012 = \n new line |
95 | // \013 = \v vertical tab |
96 | // \014 = \f form feed |
97 | // \015 = \r carriage return |
98 | // \177 = delete |
99 | for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a" |
100 | "\b\t\n\v\f\r\016\017" |
101 | "\020\021\022\023\024\025\026\027" |
102 | "\030\031\032\033\034\035\036\037" |
103 | "\177" , |
104 | 33)) |
105 | DisallowedChars.set(position: C); |
106 | |
107 | // Non-ASCII are disallowed too. |
108 | for (unsigned int C = 0x80U; C <= 0xFFU; ++C) |
109 | DisallowedChars.set(position: static_cast<unsigned char>(C)); |
110 | } |
111 | |
112 | void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { |
113 | Options.store(Options&: Opts, LocalName: "DelimiterStem" , Value: DelimiterStem); |
114 | Options.store(Options&: Opts, LocalName: "ReplaceShorterLiterals" , Value: ReplaceShorterLiterals); |
115 | } |
116 | |
117 | void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) { |
118 | Finder->addMatcher( |
119 | NodeMatch: stringLiteral(unless(hasParent(predefinedExpr()))).bind(ID: "lit" ), Action: this); |
120 | } |
121 | |
122 | void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { |
123 | const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>(ID: "lit" ); |
124 | if (Literal->getBeginLoc().isMacroID()) |
125 | return; |
126 | |
127 | if (containsEscapedCharacters(Result, Literal, DisallowedChars)) { |
128 | std::string Replacement = asRawStringLiteral(Literal, DelimiterStem); |
129 | if (ReplaceShorterLiterals || |
130 | Replacement.length() <= |
131 | Lexer::MeasureTokenLength(Loc: Literal->getBeginLoc(), |
132 | SM: *Result.SourceManager, LangOpts: getLangOpts())) |
133 | replaceWithRawStringLiteral(Result, Literal, Replacement); |
134 | } |
135 | } |
136 | |
137 | void RawStringLiteralCheck::replaceWithRawStringLiteral( |
138 | const MatchFinder::MatchResult &Result, const StringLiteral *Literal, |
139 | StringRef Replacement) { |
140 | CharSourceRange CharRange = Lexer::makeFileCharRange( |
141 | Range: CharSourceRange::getTokenRange(Literal->getSourceRange()), |
142 | SM: *Result.SourceManager, LangOpts: getLangOpts()); |
143 | diag(Loc: Literal->getBeginLoc(), |
144 | Description: "escaped string literal can be written as a raw string literal" ) |
145 | << FixItHint::CreateReplacement(RemoveRange: CharRange, Code: Replacement); |
146 | } |
147 | |
148 | } // namespace clang::tidy::modernize |
149 | |