RawStringLiteralCheck.cpp source code [clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp]

1	//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "RawStringLiteralCheck.h"
10	#include "clang/AST/ASTContext.h"
11	#include "clang/ASTMatchers/ASTMatchFinder.h"
12	#include "clang/Basic/LangOptions.h"
13	#include "clang/Basic/SourceManager.h"
14	#include "clang/Lex/Lexer.h"
15	#include "llvm/ADT/StringRef.h"
16	#include <optional>
17
18	using namespace clang::ast_matchers;
19
20	namespace clang::tidy::modernize {
21
22	namespace {
23
24	bool containsEscapes(StringRef HayStack, StringRef Escapes) {
25	size_t BackSlash = HayStack.find(C: `'\\'`);
26	if (BackSlash == StringRef::npos)
27	return false;
28
29	while (BackSlash != StringRef::npos) {
30	if (!Escapes.contains(C: HayStack [BackSlash + `1`]))
31	return false;
32	BackSlash = HayStack.find(C: `'\\'`, From: BackSlash + `2`);
33	}
34
35	return true;
36	}
37
38	bool isRawStringLiteral(StringRef Text) {
39	// Already a raw string literal if R comes before ".
40	const size_t QuotePos = Text.find(C: `'"'`);
41	assert(QuotePos != StringRef::npos);
42	return (QuotePos > `0`) && (Text [QuotePos - `1`] == `'R'`);
43	}
44
45	bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
46	const StringLiteral *Literal,
47	const CharsBitSet &DisallowedChars) {
48	// FIXME: Handle L"", u8"", u"" and U"" literals.
49	if (!Literal->isOrdinary())
50	return false;
51
52	for (const unsigned char C : Literal->getBytes())
53	if (DisallowedChars.test(position: C))
54	return false;
55
56	CharSourceRange CharRange = Lexer::makeFileCharRange(
57	Range: CharSourceRange::getTokenRange(Literal->getSourceRange()),
58	SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts());
59	StringRef Text = Lexer::getSourceText(Range: CharRange, SM: *Result.SourceManager,
60	LangOpts: Result.Context->getLangOpts());
61	if (Text.empty() \|\| isRawStringLiteral(Text))
62	return false;
63
64	return containsEscapes(HayStack: Text, Escapes: R"('\"?x01)");
65	}
66
67	bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
68	return Bytes.find(Str: Delimiter.empty()
69	? std::string (R"lit()")lit")
70	: (")" + Delimiter + R"(")")) != StringRef::npos;
71	}
72
73	} // namespace
74
75	RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
76	ClangTidyContext *Context)
77	: ClangTidyCheck (Name, Context),
78	DelimiterStem (Options.get(LocalName: "DelimiterStem", Default: "lit")),
79	ReplaceShorterLiterals(Options.get(LocalName: "ReplaceShorterLiterals", Default: false)) {
80	// Non-printing characters are disallowed:
81	// \007 = \a bell
82	// \010 = \b backspace
83	// \011 = \t horizontal tab
84	// \012 = \n new line
85	// \013 = \v vertical tab
86	// \014 = \f form feed
87	// \015 = \r carriage return
88	// \177 = delete
89	for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
90	"\b\t\n\v\f\r\016\017"
91	"\020\021\022\023\024\025\026\027"
92	"\030\031\032\033\034\035\036\037"
93	"\177",
94	`33`))
95	DisallowedChars.set(position: C);
96
97	// Non-ASCII are disallowed too.
98	for (unsigned int C = `0x80U`; C <= `0xFFU`; ++C)
99	DisallowedChars.set(position: static_cast<unsigned char>(C));
100	}
101
102	void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
103	Options.store(Options&: Opts, LocalName: "DelimiterStem", Value: DelimiterStem);
104	Options.store(Options&: Opts, LocalName: "ReplaceShorterLiterals", Value: ReplaceShorterLiterals);
105	}
106
107	void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
108	Finder->addMatcher(
109	NodeMatch: stringLiteral (unless (hasParent (predefinedExpr ()))).bind(ID: "lit"), Action: this);
110	}
111
112	static std::optional<StringRef>
113	createUserDefinedSuffix(const StringLiteral Literal, const* SourceManager &SM,
114	const LangOptions &LangOpts) {
115	const CharSourceRange TokenRange =
116	CharSourceRange::getTokenRange(Literal->getSourceRange());
117	Token T;
118	if (Lexer::getRawToken(Loc: Literal->getBeginLoc(), Result&: T, SM, LangOpts))
119	return std::nullopt;
120	const CharSourceRange CharRange =
121	Lexer::makeFileCharRange(Range: TokenRange, SM, LangOpts);
122	if (T.hasUDSuffix()) {
123	StringRef Text = Lexer::getSourceText(Range: CharRange, SM, LangOpts);
124	const size_t UDSuffixPos = Text.find_last_of(C: `'"'`);
125	if (UDSuffixPos == StringRef::npos)
126	return std::nullopt;
127	return Text.slice(Start: UDSuffixPos + `1`, End: Text.size());
128	}
129	return std::nullopt;
130	}
131
132	static std::string createRawStringLiteral(const StringLiteral *Literal,
133	const std::string &DelimiterStem,
134	const SourceManager &SM,
135	const LangOptions &LangOpts) {
136	const StringRef Bytes = Literal->getBytes();
137	std::string Delimiter;
138	for (int I = `0`; containsDelimiter(Bytes, Delimiter); ++I) {
139	Delimiter = (I == `0`) ? DelimiterStem : DelimiterStem + std::to_string(val: I);
140	}
141
142	std::optional<StringRef> UserDefinedSuffix =
143	createUserDefinedSuffix(Literal, SM, LangOpts);
144
145	if (Delimiter.empty())
146	return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(u: ""))
147	.str();
148
149	return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
150	UserDefinedSuffix.value_or(u: ""))
151	.str();
152	}
153
154	static bool compareStringLength(StringRef Replacement,
155	const StringLiteral *Literal,
156	const SourceManager &SM,
157	const LangOptions &LangOpts) {
158	return Replacement.size() <=
159	Lexer::MeasureTokenLength(Loc: Literal->getBeginLoc(), SM, LangOpts);
160	}
161
162	void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
163	const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>(ID: "lit");
164	if (Literal->getBeginLoc().isMacroID())
165	return;
166	const SourceManager &SM = *Result.SourceManager;
167	const LangOptions &LangOpts = getLangOpts();
168	if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
169	const std::string Replacement =
170	createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
171	if (ReplaceShorterLiterals \|\|
172	compareStringLength(Replacement, Literal, SM, LangOpts)) {
173	diag(Loc: Literal->getBeginLoc(),
174	Description: "escaped string literal can be written as a raw string literal")
175	<< FixItHint::CreateReplacement(Literal->getSourceRange(),
176	Replacement);
177	}
178	}
179	}
180
181	} // namespace clang::tidy::modernize
182

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp