FormatStringConverter.cpp source code [clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp]

1	//===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Implementation of the FormatStringConverter class which is used to convert
11	/// printf format strings to C++ std::formatter format strings.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "FormatStringConverter.h"
16	#include "../utils/FixItHintUtils.h"
17	#include "clang/AST/Expr.h"
18	#include "clang/ASTMatchers/ASTMatchFinder.h"
19	#include "clang/Basic/LangOptions.h"
20	#include "clang/Lex/Lexer.h"
21	#include "clang/Lex/Preprocessor.h"
22	#include "clang/Tooling/FixIt.h"
23	#include "llvm/ADT/StringExtras.h"
24	#include "llvm/Support/Debug.h"
25
26	using namespace clang::ast_matchers;
27	using namespace clang::analyze_printf;
28
29	namespace clang::tidy::utils {
30	using clang::analyze_format_string::ConversionSpecifier;
31
32	/// Is the passed type the actual "char" type, whether that be signed or
33	/// unsigned, rather than explicit signed char or unsigned char types.
34	static bool isRealCharType(const clang::QualType &Ty) {
35	using namespace clang;
36	const Type *DesugaredType = Ty ->getUnqualifiedDesugaredType();
37	if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: DesugaredType))
38	return (BT->getKind() == BuiltinType::Char_U \|\|
39	BT->getKind() == BuiltinType::Char_S);
40	return false;
41	}
42
43	/// If possible, return the text name of the signed type that corresponds to the
44	/// passed integer type. If the passed type is already signed then its name is
45	/// just returned. Only supports BuiltinTypes.
46	static std::optional<std::string>
47	getCorrespondingSignedTypeName(const clang::QualType &QT) {
48	using namespace clang;
49	const auto UQT = QT.getUnqualifiedType();
50	if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) {
51	switch (BT->getKind()) {
52	case BuiltinType::UChar:
53	case BuiltinType::Char_U:
54	case BuiltinType::SChar:
55	case BuiltinType::Char_S:
56	return "signed char";
57	case BuiltinType::UShort:
58	case BuiltinType::Short:
59	return "short";
60	case BuiltinType::UInt:
61	case BuiltinType::Int:
62	return "int";
63	case BuiltinType::ULong:
64	case BuiltinType::Long:
65	return "long";
66	case BuiltinType::ULongLong:
67	case BuiltinType::LongLong:
68	return "long long";
69	default:
70	llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
71	<< QT.getAsString() << "'\n";
72	return std::nullopt;
73	}
74	}
75
76	// Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
77	// if the argument type does.
78	const std::string TypeName = UQT.getAsString();
79	StringRef SimplifiedTypeName{TypeName};
80	const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::");
81	const StringRef Prefix = InStd ? "std::" : "";
82
83	if (SimplifiedTypeName.starts_with(Prefix: "uint") &&
84	SimplifiedTypeName.ends_with(Suffix: "_t"))
85	return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
86
87	if (SimplifiedTypeName == "size_t")
88	return (Twine(Prefix) + "ssize_t").str();
89
90	llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
91	<< UQT.getAsString() << "'\n";
92	return std::nullopt;
93	}
94
95	/// If possible, return the text name of the unsigned type that corresponds to
96	/// the passed integer type. If the passed type is already unsigned then its
97	/// name is just returned. Only supports BuiltinTypes.
98	static std::optional<std::string>
99	getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
100	using namespace clang;
101	const auto UQT = QT.getUnqualifiedType();
102	if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) {
103	switch (BT->getKind()) {
104	case BuiltinType::SChar:
105	case BuiltinType::Char_S:
106	case BuiltinType::UChar:
107	case BuiltinType::Char_U:
108	return "unsigned char";
109	case BuiltinType::Short:
110	case BuiltinType::UShort:
111	return "unsigned short";
112	case BuiltinType::Int:
113	case BuiltinType::UInt:
114	return "unsigned int";
115	case BuiltinType::Long:
116	case BuiltinType::ULong:
117	return "unsigned long";
118	case BuiltinType::LongLong:
119	case BuiltinType::ULongLong:
120	return "unsigned long long";
121	default:
122	llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
123	<< UQT.getAsString() << "'\n";
124	return std::nullopt;
125	}
126	}
127
128	// Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
129	// if the argument type does.
130	const std::string TypeName = UQT.getAsString();
131	StringRef SimplifiedTypeName{TypeName};
132	const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::");
133	const StringRef Prefix = InStd ? "std::" : "";
134
135	if (SimplifiedTypeName.starts_with(Prefix: "int") &&
136	SimplifiedTypeName.ends_with(Suffix: "_t"))
137	return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
138
139	if (SimplifiedTypeName == "ssize_t")
140	return (Twine(Prefix) + "size_t").str();
141	if (SimplifiedTypeName == "ptrdiff_t")
142	return (Twine(Prefix) + "size_t").str();
143
144	llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
145	<< UQT.getAsString() << "'\n";
146	return std::nullopt;
147	}
148
149	static std::optional<std::string>
150	castTypeForArgument(ConversionSpecifier::Kind ArgKind,
151	const clang::QualType &QT) {
152	if (ArgKind == ConversionSpecifier::Kind::uArg)
153	return getCorrespondingUnsignedTypeName(QT);
154	return getCorrespondingSignedTypeName(QT);
155	}
156
157	static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158	const clang::QualType &ArgType) {
159	if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: ArgType)) {
160	// Unadorned char never matches any expected signedness since it
161	// could be signed or unsigned.
162	const auto ArgTypeKind = BT->getKind();
163	if (ArgTypeKind == BuiltinType::Char_U \|\|
164	ArgTypeKind == BuiltinType::Char_S)
165	return false;
166	}
167
168	if (ArgKind == ConversionSpecifier::Kind::uArg)
169	return ArgType ->isUnsignedIntegerType();
170	return ArgType ->isSignedIntegerType();
171	}
172
173	namespace {
174	AST_MATCHER(clang::QualType, isRealChar) {
175	return clang::tidy::utils::isRealCharType(Ty: Node);
176	}
177	} // namespace
178
179	static bool castMismatchedIntegerTypes(const CallExpr Call, bool* StrictMode) {
180	/// For printf-style functions, the signedness of the type printed is
181	/// indicated by the corresponding type in the format string.
182	/// std::print will determine the signedness from the type of the
183	/// argument. This means that it is necessary to generate a cast in
184	/// StrictMode to ensure that the exact behaviour is maintained.
185	/// However, for templated functions like absl::PrintF and
186	/// fmt::printf, the signedness of the type printed is also taken from
187	/// the actual argument like std::print, so such casts are never
188	/// necessary. printf-style functions are variadic, whereas templated
189	/// ones aren't, so we can use that to distinguish between the two
190	/// cases.
191	if (StrictMode) {
192	const FunctionDecl *FuncDecl = Call->getDirectCallee();
193	assert(FuncDecl);
194	return FuncDecl->isVariadic();
195	}
196	return false;
197	}
198
199	FormatStringConverter::FormatStringConverter(
200	ASTContext ContextIn, const* CallExpr Call, unsigned* FormatArgOffset,
201	const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202	Preprocessor &PP)
203	: Context(ContextIn), Config (ConfigIn),
204	CastMismatchedIntegerTypes(
205	castMismatchedIntegerTypes(Call, StrictMode: ConfigIn.StrictMode)),
206	Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207	ArgsOffset(FormatArgOffset + `1`), LangOpts(LO) {
208	assert(ArgsOffset <= NumArgs);
209	FormatExpr = llvm::dyn_cast<StringLiteral>(
210	Val: Args[FormatArgOffset]->IgnoreImplicitAsWritten());
211
212	if (!FormatExpr \|\| !FormatExpr->isOrdinary()) {
213	// Function must have a narrow string literal as its first argument.
214	conversionNotPossible(Reason: "first argument is not a narrow string literal");
215	return;
216	}
217
218	if (const std::optional<StringRef> MaybeMacroName =
219	formatStringContainsUnreplaceableMacro(CallExpr: Call, FormatExpr, SM, PP);
220	MaybeMacroName) {
221	conversionNotPossible(
222	Reason: ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
223	.str());
224	return;
225	}
226
227	PrintfFormatString = FormatExpr->getString();
228
229	// Assume that the output will be approximately the same size as the input,
230	// but perhaps with a few escapes expanded.
231	const size_t EstimatedGrowth = `8`;
232	StandardFormatString.reserve(res: PrintfFormatString.size() + EstimatedGrowth);
233	StandardFormatString.push_back(c: `'\"'`);
234
235	const bool IsFreeBsdkPrintf = false;
236
237	using clang::analyze_format_string::ParsePrintfString;
238	ParsePrintfString(H&: *this, beg: PrintfFormatString.data(),
239	end: PrintfFormatString.data() + PrintfFormatString.size(),
240	LO: LangOpts, Target: Context->getTargetInfo(), isFreeBSDKPrintf: IsFreeBsdkPrintf);
241	finalizeFormatText();
242	}
243
244	std::optional<StringRef>
245	FormatStringConverter::formatStringContainsUnreplaceableMacro(
246	const CallExpr Call, const* StringLiteral *FormatExpr, SourceManager &SM,
247	Preprocessor &PP) {
248	// If a macro invocation surrounds the entire call then we don't want that to
249	// inhibit conversion. The whole format string will appear to come from that
250	// macro, as will the function call.
251	std::optional<StringRef> MaybeSurroundingMacroName;
252	if (SourceLocation BeginCallLoc = Call->getBeginLoc();
253	BeginCallLoc.isMacroID())
254	MaybeSurroundingMacroName =
255	Lexer::getImmediateMacroName(Loc: BeginCallLoc, SM, LangOpts: PP.getLangOpts());
256
257	for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
258	I != E; ++I) {
259	const SourceLocation &TokenLoc = *I;
260	if (TokenLoc.isMacroID()) {
261	const StringRef MacroName =
262	Lexer::getImmediateMacroName(Loc: TokenLoc, SM, LangOpts: PP.getLangOpts());
263
264	if (MaybeSurroundingMacroName != MacroName) {
265	// glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
266	// for types that change size so we must look for multiple prefixes.
267	if (!MacroName.starts_with(Prefix: "PRI") && !MacroName.starts_with(Prefix: "__PRI"))
268	return MacroName;
269
270	const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(Loc: TokenLoc);
271	const OptionalFileEntryRef MaybeFileEntry =
272	SM.getFileEntryRefForID(FID: SM.getFileID(SpellingLoc: TokenSpellingLoc));
273	if (!MaybeFileEntry)
274	return MacroName;
275
276	HeaderSearch &HS = PP.getHeaderSearchInfo();
277	// Check if the file is a system header
278	if (!isSystem(CK: HS.getFileDirFlavor(File: *MaybeFileEntry)) \|\|
279	llvm::sys::path::filename(path: MaybeFileEntry ->getName()) !=
280	"inttypes.h")
281	return MacroName;
282	}
283	}
284	}
285	return std::nullopt;
286	}
287
288	void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
289	std::string &FormatSpec) {
290	ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
291
292	// We only care about alignment if a field width is specified
293	if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
294	if (ArgKind == ConversionSpecifier::sArg) {
295	// Strings are left-aligned by default with std::format, so we only
296	// need to emit an alignment if this one needs to be right aligned.
297	if (!FS.isLeftJustified())
298	FormatSpec.push_back(c: `'>'`);
299	} else {
300	// Numbers are right-aligned by default with std::format, so we only
301	// need to emit an alignment if this one needs to be left aligned.
302	if (FS.isLeftJustified())
303	FormatSpec.push_back(c: `'<'`);
304	}
305	}
306	}
307
308	void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
309	std::string &FormatSpec) {
310	const ConversionSpecifier Spec = FS.getConversionSpecifier();
311
312	// Ignore on something that isn't numeric. For printf it's would be a
313	// compile-time warning but ignored at runtime, but for std::format it
314	// ought to be a compile-time error.
315	if (Spec.isAnyIntArg() \|\| Spec.isDoubleArg()) {
316	// + is preferred to ' '
317	if (FS.hasPlusPrefix())
318	FormatSpec.push_back(c: `'+'`);
319	else if (FS.hasSpacePrefix())
320	FormatSpec.push_back(c: `' '`);
321	}
322	}
323
324	void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
325	std::string &FormatSpec) {
326	if (FS.hasAlternativeForm()) {
327	switch (FS.getConversionSpecifier().getKind()) {
328	case ConversionSpecifier::Kind::aArg:
329	case ConversionSpecifier::Kind::AArg:
330	case ConversionSpecifier::Kind::eArg:
331	case ConversionSpecifier::Kind::EArg:
332	case ConversionSpecifier::Kind::fArg:
333	case ConversionSpecifier::Kind::FArg:
334	case ConversionSpecifier::Kind::gArg:
335	case ConversionSpecifier::Kind::GArg:
336	case ConversionSpecifier::Kind::xArg:
337	case ConversionSpecifier::Kind::XArg:
338	case ConversionSpecifier::Kind::oArg:
339	FormatSpec.push_back(c: `'#'`);
340	break;
341	default:
342	// Alternative forms don't exist for other argument kinds
343	break;
344	}
345	}
346	}
347
348	void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
349	std::string &FormatSpec) {
350	{
351	const OptionalAmount FieldWidth = FS.getFieldWidth();
352	switch (FieldWidth.getHowSpecified()) {
353	case OptionalAmount::NotSpecified:
354	break;
355	case OptionalAmount::Constant:
356	FormatSpec.append(str: llvm::utostr(X: FieldWidth.getConstantAmount()));
357	break;
358	case OptionalAmount::Arg:
359	FormatSpec.push_back(c: `'{'`);
360	if (FieldWidth.usesPositionalArg()) {
361	// std::format argument identifiers are zero-based, whereas printf
362	// ones are one based.
363	assert(FieldWidth.getPositionalArgIndex() > `0U`);
364	FormatSpec.append(str: llvm::utostr(X: FieldWidth.getPositionalArgIndex() - `1`));
365	}
366	FormatSpec.push_back(c: `'}'`);
367	break;
368	case OptionalAmount::Invalid:
369	break;
370	}
371	}
372	}
373
374	void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
375	std::string &FormatSpec) {
376	const OptionalAmount FieldPrecision = FS.getPrecision();
377	switch (FieldPrecision.getHowSpecified()) {
378	case OptionalAmount::NotSpecified:
379	break;
380	case OptionalAmount::Constant:
381	FormatSpec.push_back(c: `'.'`);
382	FormatSpec.append(str: llvm::utostr(X: FieldPrecision.getConstantAmount()));
383	break;
384	case OptionalAmount::Arg:
385	FormatSpec.push_back(c: `'.'`);
386	FormatSpec.push_back(c: `'{'`);
387	if (FieldPrecision.usesPositionalArg()) {
388	// std::format argument identifiers are zero-based, whereas printf
389	// ones are one based.
390	assert(FieldPrecision.getPositionalArgIndex() > `0U`);
391	FormatSpec.append(
392	str: llvm::utostr(X: FieldPrecision.getPositionalArgIndex() - `1`));
393	}
394	FormatSpec.push_back(c: `'}'`);
395	break;
396	case OptionalAmount::Invalid:
397	break;
398	}
399	}
400
401	void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
402	unsigned ArgCount = `0`;
403	const OptionalAmount FieldWidth = FS.getFieldWidth();
404	const OptionalAmount FieldPrecision = FS.getPrecision();
405
406	if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
407	!FieldWidth.usesPositionalArg())
408	++ArgCount;
409	if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
410	!FieldPrecision.usesPositionalArg())
411	++ArgCount;
412
413	if (ArgCount)
414	ArgRotates.emplace_back(args: FS.getArgIndex() + ArgsOffset, args&: ArgCount);
415	}
416
417	void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
418	const Expr *Arg) {
419	// If the argument is the result of a call to std::string::c_str() or
420	// data() with a return type of char then we can remove that call and
421	// pass the std::string directly. We don't want to do so if the return
422	// type is not a char pointer (though it's unlikely that such code would
423	// compile without warnings anyway.) See RedundantStringCStrCheck.
424
425	if (!StringCStrCallExprMatcher) {
426	// Lazily create the matcher
427	const auto StringDecl = type (hasUnqualifiedDesugaredType(InnerMatcher: recordType (
428	hasDeclaration(InnerMatcher: cxxRecordDecl (hasName(Name: "::std::basic_string"))))));
429	const auto StringExpr = expr (
430	anyOf (hasType(InnerMatcher: StringDecl), hasType(InnerMatcher: qualType (pointsTo(InnerMatcher: StringDecl)))));
431
432	StringCStrCallExprMatcher =
433	cxxMemberCallExpr (
434	on(InnerMatcher: StringExpr.bind(ID: "arg")), callee(InnerMatcher: memberExpr ().bind(ID: "member")),
435	callee(InnerMatcher: cxxMethodDecl (hasAnyName ("c_str", "data"),
436	returns(InnerMatcher: pointerType (pointee (isRealChar()))))))
437	.bind(ID: "call");
438	}
439
440	auto CStrMatches = match(Matcher: StringCStrCallExprMatcher, Node: Arg, Context&: *Context);
441	if (CStrMatches.size() == `1`)
442	ArgCStrRemovals.push_back(x: CStrMatches.front());
443	else if (Arg->getType()->isPointerType()) {
444	const QualType Pointee = Arg->getType()->getPointeeType();
445	// printf is happy to print signed char and unsigned char strings, but
446	// std::format only likes char strings.
447	if (Pointee ->isCharType() && !isRealCharType(Ty: Pointee))
448	ArgFixes.emplace_back(args&: ArgIndex, args: "reinterpret_cast<const char *>(");
449	}
450	}
451
452	bool FormatStringConverter::emitIntegerArgument(
453	ConversionSpecifier::Kind ArgKind, const Expr Arg, unsigned* ArgIndex,
454	std::string &FormatSpec) {
455	const clang::QualType &ArgType = Arg->getType();
456	if (ArgType ->isBooleanType()) {
457	// std::format will print bool as either "true" or "false" by default,
458	// but printf prints them as "0" or "1". Be compatible with printf by
459	// requesting decimal output.
460	FormatSpec.push_back(c: `'d'`);
461	} else if (ArgType ->isEnumeralType()) {
462	// std::format will try to find a specialization to print the enum
463	// (and probably fail), whereas printf would have just expected it to
464	// be passed as its underlying type. However, printf will have forced
465	// the signedness based on the format string, so we need to do the
466	// same.
467	if (const auto *ET = ArgType ->getAs<EnumType>()) {
468	if (const std::optional<std::string> MaybeCastType =
469	castTypeForArgument(ArgKind, QT: ET->getDecl()->getIntegerType()))
470	ArgFixes.emplace_back(
471	args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str());
472	else
473	return conversionNotPossible(
474	Reason: (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
475	.str());
476	}
477	} else if (CastMismatchedIntegerTypes &&
478	!isMatchingSignedness(ArgKind, ArgType)) {
479	// printf will happily print an unsigned type as signed if told to.
480	// Even -Wformat doesn't warn for this. std::format will format as
481	// unsigned unless we cast it.
482	if (const std::optional<std::string> MaybeCastType =
483	castTypeForArgument(ArgKind, QT: ArgType))
484	ArgFixes.emplace_back(
485	args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str());
486	else
487	return conversionNotPossible(
488	Reason: (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
489	Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
490	: "signed") +
491	" integer type to match format"
492	" specifier and StrictMode is enabled")
493	.str());
494	} else if (isRealCharType(Ty: ArgType) \|\| !ArgType ->isIntegerType()) {
495	// Only specify integer if the argument is of a different type
496	FormatSpec.push_back(c: `'d'`);
497	}
498	return true;
499	}
500
501	/// Append the corresponding standard format string type fragment to FormatSpec,
502	/// and store any argument fixes for later application.
503	/// @returns true on success, false on failure
504	bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
505	std::string &FormatSpec) {
506	ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
507	switch (ArgKind) {
508	case ConversionSpecifier::Kind::sArg:
509	emitStringArgument(ArgIndex: FS.getArgIndex() + ArgsOffset, Arg);
510	break;
511	case ConversionSpecifier::Kind::cArg:
512	// The type must be "c" to get a character unless the type is exactly
513	// char (whether that be signed or unsigned for the target.)
514	if (!isRealCharType(Ty: Arg->getType()))
515	FormatSpec.push_back(c: `'c'`);
516	break;
517	case ConversionSpecifier::Kind::dArg:
518	case ConversionSpecifier::Kind::iArg:
519	case ConversionSpecifier::Kind::uArg:
520	if (!emitIntegerArgument(ArgKind, Arg, ArgIndex: FS.getArgIndex() + ArgsOffset,
521	FormatSpec))
522	return false;
523	break;
524	case ConversionSpecifier::Kind::pArg: {
525	const clang::QualType &ArgType = Arg->getType();
526	// std::format knows how to format void pointers and nullptrs
527	if (!ArgType ->isNullPtrType() && !ArgType ->isVoidPointerType())
528	ArgFixes.emplace_back(args: FS.getArgIndex() + ArgsOffset,
529	args: "static_cast<const void *>(");
530	break;
531	}
532	case ConversionSpecifier::Kind::xArg:
533	FormatSpec.push_back(c: `'x'`);
534	break;
535	case ConversionSpecifier::Kind::XArg:
536	FormatSpec.push_back(c: `'X'`);
537	break;
538	case ConversionSpecifier::Kind::oArg:
539	FormatSpec.push_back(c: `'o'`);
540	break;
541	case ConversionSpecifier::Kind::aArg:
542	FormatSpec.push_back(c: `'a'`);
543	break;
544	case ConversionSpecifier::Kind::AArg:
545	FormatSpec.push_back(c: `'A'`);
546	break;
547	case ConversionSpecifier::Kind::eArg:
548	FormatSpec.push_back(c: `'e'`);
549	break;
550	case ConversionSpecifier::Kind::EArg:
551	FormatSpec.push_back(c: `'E'`);
552	break;
553	case ConversionSpecifier::Kind::fArg:
554	FormatSpec.push_back(c: `'f'`);
555	break;
556	case ConversionSpecifier::Kind::FArg:
557	FormatSpec.push_back(c: `'F'`);
558	break;
559	case ConversionSpecifier::Kind::gArg:
560	FormatSpec.push_back(c: `'g'`);
561	break;
562	case ConversionSpecifier::Kind::GArg:
563	FormatSpec.push_back(c: `'G'`);
564	break;
565	default:
566	// Something we don't understand
567	return conversionNotPossible(Reason: (Twine("argument ") +
568	Twine(FS.getArgIndex() + ArgsOffset) +
569	" has an unsupported format specifier")
570	.str());
571	}
572
573	return true;
574	}
575
576	/// Append the standard format string equivalent of the passed PrintfSpecifier
577	/// to StandardFormatString and store any argument fixes for later application.
578	/// @returns true on success, false on failure
579	bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
580	const Expr *Arg,
581	std::string &StandardFormatString) {
582	// The specifier must have an associated argument
583	assert(FS.consumesDataArgument());
584
585	StandardFormatString.push_back(c: `'{'`);
586
587	if (FS.usesPositionalArg()) {
588	// std::format argument identifiers are zero-based, whereas printf ones
589	// are one based.
590	assert(FS.getPositionalArgIndex() > `0U`);
591	StandardFormatString.append(str: llvm::utostr(X: FS.getPositionalArgIndex() - `1`));
592	}
593
594	// std::format format argument parts to potentially emit:
595	// [[fill]align][sign]["#"]["0"][width]["."precision][type]
596	std::string FormatSpec;
597
598	// printf doesn't support specifying the fill character - it's always a
599	// space, so we never need to generate one.
600
601	emitAlignment(FS, FormatSpec);
602	emitSign(FS, FormatSpec);
603	emitAlternativeForm(FS, FormatSpec);
604
605	if (FS.hasLeadingZeros())
606	FormatSpec.push_back(c: `'0'`);
607
608	emitFieldWidth(FS, FormatSpec);
609	emitPrecision(FS, FormatSpec);
610	maybeRotateArguments(FS);
611
612	if (!emitType(FS, Arg, FormatSpec))
613	return false;
614
615	if (!FormatSpec.empty()) {
616	StandardFormatString.push_back(c: `':'`);
617	StandardFormatString.append(str: FormatSpec);
618	}
619
620	StandardFormatString.push_back(c: `'}'`);
621	return true;
622	}
623
624	/// Called for each format specifier by ParsePrintfString.
625	bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
626	const char *StartSpecifier,
627	unsigned SpecifierLen,
628	const TargetInfo &Target) {
629
630	const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
631	assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
632
633	// Everything before the specifier needs copying verbatim
634	assert(StartSpecifierPos >= PrintfFormatStringPos);
635
636	appendFormatText(Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
637	StartSpecifierPos - PrintfFormatStringPos));
638
639	const ConversionSpecifier::Kind ArgKind =
640	FS.getConversionSpecifier().getKind();
641
642	// Skip over specifier
643	PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
644	assert(PrintfFormatStringPos <= PrintfFormatString.size());
645
646	FormatStringNeededRewriting = true;
647
648	if (ArgKind == ConversionSpecifier::Kind::nArg) {
649	// std::print doesn't do the equivalent of %n
650	return conversionNotPossible(Reason: "'%n' is not supported in format string");
651	}
652
653	if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
654	// std::print doesn't support %m. In theory we could insert a
655	// strerror(errno) parameter (assuming that libc has a thread-safe
656	// implementation, which glibc does), but that would require keeping track
657	// of the input and output parameter indices for position arguments too.
658	return conversionNotPossible(Reason: "'%m' is not supported in format string");
659	}
660
661	if (ArgKind == ConversionSpecifier::PercentArg) {
662	StandardFormatString.push_back(c: `'%'`);
663	return true;
664	}
665
666	const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
667	if (ArgIndex >= NumArgs) {
668	// Argument index out of range. Give up.
669	return conversionNotPossible(
670	Reason: (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
671	.str());
672	}
673
674	return convertArgument(FS, Arg: Args[ArgIndex]->IgnoreImplicitAsWritten(),
675	StandardFormatString);
676	}
677
678	/// Called at the very end just before applying fixes to capture the last part
679	/// of the format string.
680	void FormatStringConverter::finalizeFormatText() {
681	appendFormatText(
682	Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
683	PrintfFormatString.size() - PrintfFormatStringPos));
684	PrintfFormatStringPos = PrintfFormatString.size();
685
686	// It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
687	// than to std::println("Hello\r");
688	// Use StringRef until C++20 std::string::ends_with() is available.
689	const auto StandardFormatStringRef = StringRef(StandardFormatString);
690	if (Config.AllowTrailingNewlineRemoval &&
691	StandardFormatStringRef.ends_with(Suffix: "\\n") &&
692	!StandardFormatStringRef.ends_with(Suffix: "\\\\n") &&
693	!StandardFormatStringRef.ends_with(Suffix: "\\r\\n")) {
694	UsePrintNewlineFunction = true;
695	FormatStringNeededRewriting = true;
696	StandardFormatString.erase(first: StandardFormatString.end() - `2`,
697	last: StandardFormatString.end());
698	}
699
700	StandardFormatString.push_back(c: `'\"'`);
701	}
702
703	/// Append literal parts of the format text, reinstating escapes as required.
704	void FormatStringConverter::appendFormatText(const StringRef Text) {
705	for (const char Ch : Text) {
706	if (Ch == `'\a'`)
707	StandardFormatString += "\\a";
708	else if (Ch == `'\b'`)
709	StandardFormatString += "\\b";
710	else if (Ch == `'\f'`)
711	StandardFormatString += "\\f";
712	else if (Ch == `'\n'`)
713	StandardFormatString += "\\n";
714	else if (Ch == `'\r'`)
715	StandardFormatString += "\\r";
716	else if (Ch == `'\t'`)
717	StandardFormatString += "\\t";
718	else if (Ch == `'\v'`)
719	StandardFormatString += "\\v";
720	else if (Ch == `'\"'`)
721	StandardFormatString += "\\\"";
722	else if (Ch == `'\\'`)
723	StandardFormatString += "\\\\";
724	else if (Ch == `'{'`) {
725	StandardFormatString += "{{";
726	FormatStringNeededRewriting = true;
727	} else if (Ch == `'}'`) {
728	StandardFormatString += "}}";
729	FormatStringNeededRewriting = true;
730	} else if (Ch < `32`) {
731	StandardFormatString += "\\x";
732	StandardFormatString += llvm::hexdigit(X: Ch >> `4`, LowerCase: true);
733	StandardFormatString += llvm::hexdigit(X: Ch & `0xf`, LowerCase: true);
734	} else
735	StandardFormatString += Ch;
736	}
737	}
738
739	static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
740	ASTContext &Context) {
741	const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>(ID: "arg");
742	const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>(ID: "member");
743	const bool Arrow = Member->isArrow();
744	return Arrow ? utils::fixit::formatDereference(ExprNode: *Arg, Context)
745	: tooling::fixit::getText(Node: *Arg, Context).str();
746	}
747
748	/// Called by the check when it is ready to apply the fixes.
749	void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
750	SourceManager &SM) {
751	if (FormatStringNeededRewriting) {
752	Diag << FixItHint::CreateReplacement(
753	RemoveRange: CharSourceRange::getTokenRange(B: FormatExpr->getBeginLoc(),
754	E: FormatExpr->getEndLoc()),
755	Code: StandardFormatString);
756	}
757
758	// ArgCount is one less than the number of arguments to be rotated.
759	for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
760	assert(ValueArgIndex < NumArgs);
761	assert(ValueArgIndex > ArgCount);
762
763	// First move the value argument to the right place. But if there's a
764	// pending c_str() removal then we must do that at the same time.
765	if (const auto CStrRemovalMatch =
766	std::find_if(first: ArgCStrRemovals.cbegin(), last: ArgCStrRemovals.cend(),
767	pred: [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
768	const BoundNodes &Match) {
769	// This c_str() removal corresponds to the argument
770	// being moved if they start at the same location.
771	const Expr *CStrArg = Match.getNodeAs<Expr>(ID: "arg");
772	return ArgStartPos == CStrArg->getBeginLoc();
773	});
774	CStrRemovalMatch != ArgCStrRemovals.end()) {
775	const std::string ArgText =
776	withoutCStrReplacement(CStrRemovalMatch: CStrRemovalMatch, Context&: Context);
777	assert(!ArgText.empty());
778
779	Diag << FixItHint::CreateReplacement(
780	Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
781
782	// That c_str() removal is now dealt with, so we don't need to do it again
783	ArgCStrRemovals.erase(position: CStrRemovalMatch);
784	} else
785	Diag << tooling::fixit::createReplacement(Destination: *Args[ValueArgIndex - ArgCount],
786	Source: Args[ValueArgIndex], Context: Context);
787
788	// Now shift down the field width and precision (if either are present) to
789	// accommodate it.
790	for (size_t Offset = `0`; Offset < ArgCount; ++Offset)
791	Diag << tooling::fixit::createReplacement(
792	Destination: Args[ValueArgIndex - Offset], Source: Args[ValueArgIndex - Offset - `1`],
793	Context: *Context);
794
795	// Now we need to modify the ArgFix index too so that we fix the right
796	// argument. We don't need to care about the width and precision indices
797	// since they never need fixing.
798	for (auto &ArgFix : ArgFixes) {
799	if (ArgFix.ArgIndex == ValueArgIndex)
800	ArgFix.ArgIndex = ValueArgIndex - ArgCount;
801	}
802	}
803
804	for (const auto &[ArgIndex, Replacement] : ArgFixes) {
805	SourceLocation AfterOtherSide =
806	Lexer::findNextToken(Loc: Args[ArgIndex]->getEndLoc(), SM, LangOpts)
807	->getLocation();
808
809	Diag << FixItHint::CreateInsertion(InsertionLoc: Args[ArgIndex]->getBeginLoc(),
810	Code: Replacement, BeforePreviousInsertions: true)
811	<< FixItHint::CreateInsertion(InsertionLoc: AfterOtherSide, Code: ")", BeforePreviousInsertions: true);
812	}
813
814	for (const auto &Match : ArgCStrRemovals) {
815	const auto *Call = Match.getNodeAs<CallExpr>(ID: "call");
816	const std::string ArgText = withoutCStrReplacement(CStrRemovalMatch: Match, Context&: *Context);
817	if (!ArgText.empty())
818	Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
819	}
820	}
821	} // namespace clang::tidy::utils
822

Provided by KDAB

Definitions

source code of clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp