1//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "RawStringLiteralCheck.h"
10#include "clang/AST/ASTContext.h"
11#include "clang/ASTMatchers/ASTMatchFinder.h"
12#include "clang/Basic/LangOptions.h"
13#include "clang/Basic/SourceManager.h"
14#include "clang/Lex/Lexer.h"
15#include "llvm/ADT/StringRef.h"
16#include <optional>
17
18using namespace clang::ast_matchers;
19
20namespace clang::tidy::modernize {
21
22namespace {
23
24bool containsEscapes(StringRef HayStack, StringRef Escapes) {
25 size_t BackSlash = HayStack.find(C: '\\');
26 if (BackSlash == StringRef::npos)
27 return false;
28
29 while (BackSlash != StringRef::npos) {
30 if (!Escapes.contains(C: HayStack[BackSlash + 1]))
31 return false;
32 BackSlash = HayStack.find(C: '\\', From: BackSlash + 2);
33 }
34
35 return true;
36}
37
38bool isRawStringLiteral(StringRef Text) {
39 // Already a raw string literal if R comes before ".
40 const size_t QuotePos = Text.find(C: '"');
41 assert(QuotePos != StringRef::npos);
42 return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
43}
44
45bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
46 const StringLiteral *Literal,
47 const CharsBitSet &DisallowedChars) {
48 // FIXME: Handle L"", u8"", u"" and U"" literals.
49 if (!Literal->isOrdinary())
50 return false;
51
52 for (const unsigned char C : Literal->getBytes())
53 if (DisallowedChars.test(position: C))
54 return false;
55
56 CharSourceRange CharRange = Lexer::makeFileCharRange(
57 Range: CharSourceRange::getTokenRange(Literal->getSourceRange()),
58 SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts());
59 StringRef Text = Lexer::getSourceText(Range: CharRange, SM: *Result.SourceManager,
60 LangOpts: Result.Context->getLangOpts());
61 if (Text.empty() || isRawStringLiteral(Text))
62 return false;
63
64 return containsEscapes(HayStack: Text, Escapes: R"('\"?x01)");
65}
66
67bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
68 return Bytes.find(Str: Delimiter.empty()
69 ? std::string(R"lit()")lit")
70 : (")" + Delimiter + R"(")")) != StringRef::npos;
71}
72
73} // namespace
74
75RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
76 ClangTidyContext *Context)
77 : ClangTidyCheck(Name, Context),
78 DelimiterStem(Options.get(LocalName: "DelimiterStem", Default: "lit")),
79 ReplaceShorterLiterals(Options.get(LocalName: "ReplaceShorterLiterals", Default: false)) {
80 // Non-printing characters are disallowed:
81 // \007 = \a bell
82 // \010 = \b backspace
83 // \011 = \t horizontal tab
84 // \012 = \n new line
85 // \013 = \v vertical tab
86 // \014 = \f form feed
87 // \015 = \r carriage return
88 // \177 = delete
89 for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
90 "\b\t\n\v\f\r\016\017"
91 "\020\021\022\023\024\025\026\027"
92 "\030\031\032\033\034\035\036\037"
93 "\177",
94 33))
95 DisallowedChars.set(position: C);
96
97 // Non-ASCII are disallowed too.
98 for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
99 DisallowedChars.set(position: static_cast<unsigned char>(C));
100}
101
102void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
103 Options.store(Options&: Opts, LocalName: "DelimiterStem", Value: DelimiterStem);
104 Options.store(Options&: Opts, LocalName: "ReplaceShorterLiterals", Value: ReplaceShorterLiterals);
105}
106
107void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
108 Finder->addMatcher(
109 NodeMatch: stringLiteral(unless(hasParent(predefinedExpr()))).bind(ID: "lit"), Action: this);
110}
111
112static std::optional<StringRef>
113createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM,
114 const LangOptions &LangOpts) {
115 const CharSourceRange TokenRange =
116 CharSourceRange::getTokenRange(Literal->getSourceRange());
117 Token T;
118 if (Lexer::getRawToken(Loc: Literal->getBeginLoc(), Result&: T, SM, LangOpts))
119 return std::nullopt;
120 const CharSourceRange CharRange =
121 Lexer::makeFileCharRange(Range: TokenRange, SM, LangOpts);
122 if (T.hasUDSuffix()) {
123 StringRef Text = Lexer::getSourceText(Range: CharRange, SM, LangOpts);
124 const size_t UDSuffixPos = Text.find_last_of(C: '"');
125 if (UDSuffixPos == StringRef::npos)
126 return std::nullopt;
127 return Text.slice(Start: UDSuffixPos + 1, End: Text.size());
128 }
129 return std::nullopt;
130}
131
132static std::string createRawStringLiteral(const StringLiteral *Literal,
133 const std::string &DelimiterStem,
134 const SourceManager &SM,
135 const LangOptions &LangOpts) {
136 const StringRef Bytes = Literal->getBytes();
137 std::string Delimiter;
138 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
139 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(val: I);
140 }
141
142 std::optional<StringRef> UserDefinedSuffix =
143 createUserDefinedSuffix(Literal, SM, LangOpts);
144
145 if (Delimiter.empty())
146 return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(u: ""))
147 .str();
148
149 return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
150 UserDefinedSuffix.value_or(u: ""))
151 .str();
152}
153
154static bool compareStringLength(StringRef Replacement,
155 const StringLiteral *Literal,
156 const SourceManager &SM,
157 const LangOptions &LangOpts) {
158 return Replacement.size() <=
159 Lexer::MeasureTokenLength(Loc: Literal->getBeginLoc(), SM, LangOpts);
160}
161
162void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
163 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>(ID: "lit");
164 if (Literal->getBeginLoc().isMacroID())
165 return;
166 const SourceManager &SM = *Result.SourceManager;
167 const LangOptions &LangOpts = getLangOpts();
168 if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
169 const std::string Replacement =
170 createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
171 if (ReplaceShorterLiterals ||
172 compareStringLength(Replacement, Literal, SM, LangOpts)) {
173 diag(Loc: Literal->getBeginLoc(),
174 Description: "escaped string literal can be written as a raw string literal")
175 << FixItHint::CreateReplacement(Literal->getSourceRange(),
176 Replacement);
177 }
178 }
179}
180
181} // namespace clang::tidy::modernize
182

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp