1 | //===--- FormatStringConverter.cpp - clang-tidy----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Implementation of the FormatStringConverter class which is used to convert |
11 | /// printf format strings to C++ std::formatter format strings. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatStringConverter.h" |
16 | #include "../utils/FixItHintUtils.h" |
17 | #include "clang/AST/Expr.h" |
18 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
19 | #include "clang/Basic/LangOptions.h" |
20 | #include "clang/Lex/Lexer.h" |
21 | #include "clang/Tooling/FixIt.h" |
22 | #include "llvm/ADT/StringExtras.h" |
23 | #include "llvm/Support/Debug.h" |
24 | |
25 | using namespace clang::ast_matchers; |
26 | using namespace clang::analyze_printf; |
27 | |
28 | namespace clang::tidy::utils { |
29 | using clang::analyze_format_string::ConversionSpecifier; |
30 | |
31 | /// Is the passed type the actual "char" type, whether that be signed or |
32 | /// unsigned, rather than explicit signed char or unsigned char types. |
33 | static bool isRealCharType(const clang::QualType &Ty) { |
34 | using namespace clang; |
35 | const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); |
36 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: DesugaredType)) |
37 | return (BT->getKind() == BuiltinType::Char_U || |
38 | BT->getKind() == BuiltinType::Char_S); |
39 | return false; |
40 | } |
41 | |
42 | /// If possible, return the text name of the signed type that corresponds to the |
43 | /// passed integer type. If the passed type is already signed then its name is |
44 | /// just returned. Only supports BuiltinTypes. |
45 | static std::optional<std::string> |
46 | getCorrespondingSignedTypeName(const clang::QualType &QT) { |
47 | using namespace clang; |
48 | const auto UQT = QT.getUnqualifiedType(); |
49 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
50 | switch (BT->getKind()) { |
51 | case BuiltinType::UChar: |
52 | case BuiltinType::Char_U: |
53 | case BuiltinType::SChar: |
54 | case BuiltinType::Char_S: |
55 | return "signed char" ; |
56 | case BuiltinType::UShort: |
57 | case BuiltinType::Short: |
58 | return "short" ; |
59 | case BuiltinType::UInt: |
60 | case BuiltinType::Int: |
61 | return "int" ; |
62 | case BuiltinType::ULong: |
63 | case BuiltinType::Long: |
64 | return "long" ; |
65 | case BuiltinType::ULongLong: |
66 | case BuiltinType::LongLong: |
67 | return "long long" ; |
68 | default: |
69 | llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" |
70 | << QT.getAsString() << "'\n" ; |
71 | return std::nullopt; |
72 | } |
73 | } |
74 | |
75 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
76 | // if the argument type does. |
77 | const std::string TypeName = UQT.getAsString(); |
78 | StringRef SimplifiedTypeName{TypeName}; |
79 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::" ); |
80 | const StringRef Prefix = InStd ? "std::" : "" ; |
81 | |
82 | if (SimplifiedTypeName.starts_with(Prefix: "uint" ) && |
83 | SimplifiedTypeName.ends_with(Suffix: "_t" )) |
84 | return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); |
85 | |
86 | if (SimplifiedTypeName == "size_t" ) |
87 | return (Twine(Prefix) + "ssize_t" ).str(); |
88 | |
89 | llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" |
90 | << UQT.getAsString() << "'\n" ; |
91 | return std::nullopt; |
92 | } |
93 | |
94 | /// If possible, return the text name of the unsigned type that corresponds to |
95 | /// the passed integer type. If the passed type is already unsigned then its |
96 | /// name is just returned. Only supports BuiltinTypes. |
97 | static std::optional<std::string> |
98 | getCorrespondingUnsignedTypeName(const clang::QualType &QT) { |
99 | using namespace clang; |
100 | const auto UQT = QT.getUnqualifiedType(); |
101 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
102 | switch (BT->getKind()) { |
103 | case BuiltinType::SChar: |
104 | case BuiltinType::Char_S: |
105 | case BuiltinType::UChar: |
106 | case BuiltinType::Char_U: |
107 | return "unsigned char" ; |
108 | case BuiltinType::Short: |
109 | case BuiltinType::UShort: |
110 | return "unsigned short" ; |
111 | case BuiltinType::Int: |
112 | case BuiltinType::UInt: |
113 | return "unsigned int" ; |
114 | case BuiltinType::Long: |
115 | case BuiltinType::ULong: |
116 | return "unsigned long" ; |
117 | case BuiltinType::LongLong: |
118 | case BuiltinType::ULongLong: |
119 | return "unsigned long long" ; |
120 | default: |
121 | llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" |
122 | << UQT.getAsString() << "'\n" ; |
123 | return std::nullopt; |
124 | } |
125 | } |
126 | |
127 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
128 | // if the argument type does. |
129 | const std::string TypeName = UQT.getAsString(); |
130 | StringRef SimplifiedTypeName{TypeName}; |
131 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::" ); |
132 | const StringRef Prefix = InStd ? "std::" : "" ; |
133 | |
134 | if (SimplifiedTypeName.starts_with(Prefix: "int" ) && |
135 | SimplifiedTypeName.ends_with(Suffix: "_t" )) |
136 | return (Twine(Prefix) + "u" + SimplifiedTypeName).str(); |
137 | |
138 | if (SimplifiedTypeName == "ssize_t" ) |
139 | return (Twine(Prefix) + "size_t" ).str(); |
140 | if (SimplifiedTypeName == "ptrdiff_t" ) |
141 | return (Twine(Prefix) + "size_t" ).str(); |
142 | |
143 | llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" |
144 | << UQT.getAsString() << "'\n" ; |
145 | return std::nullopt; |
146 | } |
147 | |
148 | static std::optional<std::string> |
149 | castTypeForArgument(ConversionSpecifier::Kind ArgKind, |
150 | const clang::QualType &QT) { |
151 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
152 | return getCorrespondingUnsignedTypeName(QT); |
153 | return getCorrespondingSignedTypeName(QT); |
154 | } |
155 | |
156 | static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, |
157 | const clang::QualType &ArgType) { |
158 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: ArgType)) { |
159 | // Unadorned char never matches any expected signedness since it |
160 | // could be signed or unsigned. |
161 | const auto ArgTypeKind = BT->getKind(); |
162 | if (ArgTypeKind == BuiltinType::Char_U || |
163 | ArgTypeKind == BuiltinType::Char_S) |
164 | return false; |
165 | } |
166 | |
167 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
168 | return ArgType->isUnsignedIntegerType(); |
169 | return ArgType->isSignedIntegerType(); |
170 | } |
171 | |
172 | namespace { |
173 | AST_MATCHER(clang::QualType, isRealChar) { |
174 | return clang::tidy::utils::isRealCharType(Ty: Node); |
175 | } |
176 | } // namespace |
177 | |
178 | static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { |
179 | /// For printf-style functions, the signedness of the type printed is |
180 | /// indicated by the corresponding type in the format string. |
181 | /// std::print will determine the signedness from the type of the |
182 | /// argument. This means that it is necessary to generate a cast in |
183 | /// StrictMode to ensure that the exact behaviour is maintained. |
184 | /// However, for templated functions like absl::PrintF and |
185 | /// fmt::printf, the signedness of the type printed is also taken from |
186 | /// the actual argument like std::print, so such casts are never |
187 | /// necessary. printf-style functions are variadic, whereas templated |
188 | /// ones aren't, so we can use that to distinguish between the two |
189 | /// cases. |
190 | if (StrictMode) { |
191 | const FunctionDecl *FuncDecl = Call->getDirectCallee(); |
192 | assert(FuncDecl); |
193 | return FuncDecl->isVariadic(); |
194 | } |
195 | return false; |
196 | } |
197 | |
198 | FormatStringConverter::FormatStringConverter(ASTContext *ContextIn, |
199 | const CallExpr *Call, |
200 | unsigned FormatArgOffset, |
201 | bool StrictMode, |
202 | const LangOptions &LO) |
203 | : Context(ContextIn), |
204 | CastMismatchedIntegerTypes(castMismatchedIntegerTypes(Call, StrictMode)), |
205 | Args(Call->getArgs()), NumArgs(Call->getNumArgs()), |
206 | ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { |
207 | assert(ArgsOffset <= NumArgs); |
208 | FormatExpr = llvm::dyn_cast<StringLiteral>( |
209 | Val: Args[FormatArgOffset]->IgnoreImplicitAsWritten()); |
210 | assert(FormatExpr); |
211 | if (!FormatExpr->isOrdinary()) |
212 | return; // No wide string support yet |
213 | PrintfFormatString = FormatExpr->getString(); |
214 | |
215 | // Assume that the output will be approximately the same size as the input, |
216 | // but perhaps with a few escapes expanded. |
217 | const size_t EstimatedGrowth = 8; |
218 | StandardFormatString.reserve(res: PrintfFormatString.size() + EstimatedGrowth); |
219 | StandardFormatString.push_back(c: '\"'); |
220 | |
221 | const bool IsFreeBsdkPrintf = false; |
222 | |
223 | using clang::analyze_format_string::ParsePrintfString; |
224 | ParsePrintfString(H&: *this, beg: PrintfFormatString.data(), |
225 | end: PrintfFormatString.data() + PrintfFormatString.size(), |
226 | LO: LangOpts, Target: Context->getTargetInfo(), isFreeBSDKPrintf: IsFreeBsdkPrintf); |
227 | finalizeFormatText(); |
228 | } |
229 | |
230 | void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, |
231 | std::string &FormatSpec) { |
232 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
233 | |
234 | // We only care about alignment if a field width is specified |
235 | if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { |
236 | if (ArgKind == ConversionSpecifier::sArg) { |
237 | // Strings are left-aligned by default with std::format, so we only |
238 | // need to emit an alignment if this one needs to be right aligned. |
239 | if (!FS.isLeftJustified()) |
240 | FormatSpec.push_back(c: '>'); |
241 | } else { |
242 | // Numbers are right-aligned by default with std::format, so we only |
243 | // need to emit an alignment if this one needs to be left aligned. |
244 | if (FS.isLeftJustified()) |
245 | FormatSpec.push_back(c: '<'); |
246 | } |
247 | } |
248 | } |
249 | |
250 | void FormatStringConverter::emitSign(const PrintfSpecifier &FS, |
251 | std::string &FormatSpec) { |
252 | const ConversionSpecifier Spec = FS.getConversionSpecifier(); |
253 | |
254 | // Ignore on something that isn't numeric. For printf it's would be a |
255 | // compile-time warning but ignored at runtime, but for std::format it |
256 | // ought to be a compile-time error. |
257 | if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { |
258 | // + is preferred to ' ' |
259 | if (FS.hasPlusPrefix()) |
260 | FormatSpec.push_back(c: '+'); |
261 | else if (FS.hasSpacePrefix()) |
262 | FormatSpec.push_back(c: ' '); |
263 | } |
264 | } |
265 | |
266 | void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, |
267 | std::string &FormatSpec) { |
268 | if (FS.hasAlternativeForm()) { |
269 | switch (FS.getConversionSpecifier().getKind()) { |
270 | case ConversionSpecifier::Kind::aArg: |
271 | case ConversionSpecifier::Kind::AArg: |
272 | case ConversionSpecifier::Kind::eArg: |
273 | case ConversionSpecifier::Kind::EArg: |
274 | case ConversionSpecifier::Kind::fArg: |
275 | case ConversionSpecifier::Kind::FArg: |
276 | case ConversionSpecifier::Kind::gArg: |
277 | case ConversionSpecifier::Kind::GArg: |
278 | case ConversionSpecifier::Kind::xArg: |
279 | case ConversionSpecifier::Kind::XArg: |
280 | case ConversionSpecifier::Kind::oArg: |
281 | FormatSpec.push_back(c: '#'); |
282 | break; |
283 | default: |
284 | // Alternative forms don't exist for other argument kinds |
285 | break; |
286 | } |
287 | } |
288 | } |
289 | |
290 | void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, |
291 | std::string &FormatSpec) { |
292 | { |
293 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
294 | switch (FieldWidth.getHowSpecified()) { |
295 | case OptionalAmount::NotSpecified: |
296 | break; |
297 | case OptionalAmount::Constant: |
298 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getConstantAmount())); |
299 | break; |
300 | case OptionalAmount::Arg: |
301 | FormatSpec.push_back(c: '{'); |
302 | if (FieldWidth.usesPositionalArg()) { |
303 | // std::format argument identifiers are zero-based, whereas printf |
304 | // ones are one based. |
305 | assert(FieldWidth.getPositionalArgIndex() > 0U); |
306 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getPositionalArgIndex() - 1)); |
307 | } |
308 | FormatSpec.push_back(c: '}'); |
309 | break; |
310 | case OptionalAmount::Invalid: |
311 | break; |
312 | } |
313 | } |
314 | } |
315 | |
316 | void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, |
317 | std::string &FormatSpec) { |
318 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
319 | switch (FieldPrecision.getHowSpecified()) { |
320 | case OptionalAmount::NotSpecified: |
321 | break; |
322 | case OptionalAmount::Constant: |
323 | FormatSpec.push_back(c: '.'); |
324 | FormatSpec.append(str: llvm::utostr(X: FieldPrecision.getConstantAmount())); |
325 | break; |
326 | case OptionalAmount::Arg: |
327 | FormatSpec.push_back(c: '.'); |
328 | FormatSpec.push_back(c: '{'); |
329 | if (FieldPrecision.usesPositionalArg()) { |
330 | // std::format argument identifiers are zero-based, whereas printf |
331 | // ones are one based. |
332 | assert(FieldPrecision.getPositionalArgIndex() > 0U); |
333 | FormatSpec.append( |
334 | str: llvm::utostr(X: FieldPrecision.getPositionalArgIndex() - 1)); |
335 | } |
336 | FormatSpec.push_back(c: '}'); |
337 | break; |
338 | case OptionalAmount::Invalid: |
339 | break; |
340 | } |
341 | } |
342 | |
343 | void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { |
344 | unsigned ArgCount = 0; |
345 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
346 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
347 | |
348 | if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && |
349 | !FieldWidth.usesPositionalArg()) |
350 | ++ArgCount; |
351 | if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && |
352 | !FieldPrecision.usesPositionalArg()) |
353 | ++ArgCount; |
354 | |
355 | if (ArgCount) |
356 | ArgRotates.emplace_back(args: FS.getArgIndex() + ArgsOffset, args&: ArgCount); |
357 | } |
358 | |
359 | void FormatStringConverter::emitStringArgument(unsigned ArgIndex, |
360 | const Expr *Arg) { |
361 | // If the argument is the result of a call to std::string::c_str() or |
362 | // data() with a return type of char then we can remove that call and |
363 | // pass the std::string directly. We don't want to do so if the return |
364 | // type is not a char pointer (though it's unlikely that such code would |
365 | // compile without warnings anyway.) See RedundantStringCStrCheck. |
366 | |
367 | if (!StringCStrCallExprMatcher) { |
368 | // Lazily create the matcher |
369 | const auto StringDecl = type(hasUnqualifiedDesugaredType(InnerMatcher: recordType( |
370 | hasDeclaration(InnerMatcher: cxxRecordDecl(hasName(Name: "::std::basic_string" )))))); |
371 | const auto StringExpr = expr( |
372 | anyOf(hasType(InnerMatcher: StringDecl), hasType(InnerMatcher: qualType(pointsTo(InnerMatcher: StringDecl))))); |
373 | |
374 | StringCStrCallExprMatcher = |
375 | cxxMemberCallExpr( |
376 | on(InnerMatcher: StringExpr.bind(ID: "arg" )), callee(InnerMatcher: memberExpr().bind(ID: "member" )), |
377 | callee(InnerMatcher: cxxMethodDecl(hasAnyName("c_str" , "data" ), |
378 | returns(InnerMatcher: pointerType(pointee(isRealChar())))))) |
379 | .bind(ID: "call" ); |
380 | } |
381 | |
382 | auto CStrMatches = match(Matcher: *StringCStrCallExprMatcher, Node: *Arg, Context&: *Context); |
383 | if (CStrMatches.size() == 1) |
384 | ArgCStrRemovals.push_back(x: CStrMatches.front()); |
385 | else if (Arg->getType()->isPointerType()) { |
386 | const QualType Pointee = Arg->getType()->getPointeeType(); |
387 | // printf is happy to print signed char and unsigned char strings, but |
388 | // std::format only likes char strings. |
389 | if (Pointee->isCharType() && !isRealCharType(Ty: Pointee)) |
390 | ArgFixes.emplace_back(args&: ArgIndex, args: "reinterpret_cast<const char *>(" ); |
391 | } |
392 | } |
393 | |
394 | bool FormatStringConverter::emitIntegerArgument( |
395 | ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, |
396 | std::string &FormatSpec) { |
397 | const clang::QualType &ArgType = Arg->getType(); |
398 | if (ArgType->isBooleanType()) { |
399 | // std::format will print bool as either "true" or "false" by default, |
400 | // but printf prints them as "0" or "1". Be compatible with printf by |
401 | // requesting decimal output. |
402 | FormatSpec.push_back(c: 'd'); |
403 | } else if (ArgType->isEnumeralType()) { |
404 | // std::format will try to find a specialization to print the enum |
405 | // (and probably fail), whereas printf would have just expected it to |
406 | // be passed as its underlying type. However, printf will have forced |
407 | // the signedness based on the format string, so we need to do the |
408 | // same. |
409 | if (const auto *ET = ArgType->getAs<EnumType>()) { |
410 | if (const std::optional<std::string> MaybeCastType = |
411 | castTypeForArgument(ArgKind, QT: ET->getDecl()->getIntegerType())) |
412 | ArgFixes.emplace_back( |
413 | args&: ArgIndex, args: (Twine("static_cast<" ) + *MaybeCastType + ">(" ).str()); |
414 | else |
415 | return conversionNotPossible( |
416 | Reason: (Twine("argument " ) + Twine(ArgIndex) + " has unexpected enum type" ) |
417 | .str()); |
418 | } |
419 | } else if (CastMismatchedIntegerTypes && |
420 | !isMatchingSignedness(ArgKind, ArgType)) { |
421 | // printf will happily print an unsigned type as signed if told to. |
422 | // Even -Wformat doesn't warn for this. std::format will format as |
423 | // unsigned unless we cast it. |
424 | if (const std::optional<std::string> MaybeCastType = |
425 | castTypeForArgument(ArgKind, QT: ArgType)) |
426 | ArgFixes.emplace_back( |
427 | args&: ArgIndex, args: (Twine("static_cast<" ) + *MaybeCastType + ">(" ).str()); |
428 | else |
429 | return conversionNotPossible( |
430 | Reason: (Twine("argument " ) + Twine(ArgIndex) + " cannot be cast to " + |
431 | Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" |
432 | : "signed" ) + |
433 | " integer type to match format" |
434 | " specifier and StrictMode is enabled" ) |
435 | .str()); |
436 | } else if (isRealCharType(Ty: ArgType) || !ArgType->isIntegerType()) { |
437 | // Only specify integer if the argument is of a different type |
438 | FormatSpec.push_back(c: 'd'); |
439 | } |
440 | return true; |
441 | } |
442 | |
443 | /// Append the corresponding standard format string type fragment to FormatSpec, |
444 | /// and store any argument fixes for later application. |
445 | /// @returns true on success, false on failure |
446 | bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, |
447 | std::string &FormatSpec) { |
448 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
449 | switch (ArgKind) { |
450 | case ConversionSpecifier::Kind::sArg: |
451 | emitStringArgument(ArgIndex: FS.getArgIndex() + ArgsOffset, Arg); |
452 | break; |
453 | case ConversionSpecifier::Kind::cArg: |
454 | // The type must be "c" to get a character unless the type is exactly |
455 | // char (whether that be signed or unsigned for the target.) |
456 | if (!isRealCharType(Ty: Arg->getType())) |
457 | FormatSpec.push_back(c: 'c'); |
458 | break; |
459 | case ConversionSpecifier::Kind::dArg: |
460 | case ConversionSpecifier::Kind::iArg: |
461 | case ConversionSpecifier::Kind::uArg: |
462 | if (!emitIntegerArgument(ArgKind, Arg, ArgIndex: FS.getArgIndex() + ArgsOffset, |
463 | FormatSpec)) |
464 | return false; |
465 | break; |
466 | case ConversionSpecifier::Kind::pArg: { |
467 | const clang::QualType &ArgType = Arg->getType(); |
468 | // std::format knows how to format void pointers and nullptrs |
469 | if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) |
470 | ArgFixes.emplace_back(args: FS.getArgIndex() + ArgsOffset, |
471 | args: "static_cast<const void *>(" ); |
472 | break; |
473 | } |
474 | case ConversionSpecifier::Kind::xArg: |
475 | FormatSpec.push_back(c: 'x'); |
476 | break; |
477 | case ConversionSpecifier::Kind::XArg: |
478 | FormatSpec.push_back(c: 'X'); |
479 | break; |
480 | case ConversionSpecifier::Kind::oArg: |
481 | FormatSpec.push_back(c: 'o'); |
482 | break; |
483 | case ConversionSpecifier::Kind::aArg: |
484 | FormatSpec.push_back(c: 'a'); |
485 | break; |
486 | case ConversionSpecifier::Kind::AArg: |
487 | FormatSpec.push_back(c: 'A'); |
488 | break; |
489 | case ConversionSpecifier::Kind::eArg: |
490 | FormatSpec.push_back(c: 'e'); |
491 | break; |
492 | case ConversionSpecifier::Kind::EArg: |
493 | FormatSpec.push_back(c: 'E'); |
494 | break; |
495 | case ConversionSpecifier::Kind::fArg: |
496 | FormatSpec.push_back(c: 'f'); |
497 | break; |
498 | case ConversionSpecifier::Kind::FArg: |
499 | FormatSpec.push_back(c: 'F'); |
500 | break; |
501 | case ConversionSpecifier::Kind::gArg: |
502 | FormatSpec.push_back(c: 'g'); |
503 | break; |
504 | case ConversionSpecifier::Kind::GArg: |
505 | FormatSpec.push_back(c: 'G'); |
506 | break; |
507 | default: |
508 | // Something we don't understand |
509 | return conversionNotPossible(Reason: (Twine("argument " ) + |
510 | Twine(FS.getArgIndex() + ArgsOffset) + |
511 | " has an unsupported format specifier" ) |
512 | .str()); |
513 | } |
514 | |
515 | return true; |
516 | } |
517 | |
518 | /// Append the standard format string equivalent of the passed PrintfSpecifier |
519 | /// to StandardFormatString and store any argument fixes for later application. |
520 | /// @returns true on success, false on failure |
521 | bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, |
522 | const Expr *Arg, |
523 | std::string &StandardFormatString) { |
524 | // The specifier must have an associated argument |
525 | assert(FS.consumesDataArgument()); |
526 | |
527 | StandardFormatString.push_back(c: '{'); |
528 | |
529 | if (FS.usesPositionalArg()) { |
530 | // std::format argument identifiers are zero-based, whereas printf ones |
531 | // are one based. |
532 | assert(FS.getPositionalArgIndex() > 0U); |
533 | StandardFormatString.append(str: llvm::utostr(X: FS.getPositionalArgIndex() - 1)); |
534 | } |
535 | |
536 | // std::format format argument parts to potentially emit: |
537 | // [[fill]align][sign]["#"]["0"][width]["."precision][type] |
538 | std::string FormatSpec; |
539 | |
540 | // printf doesn't support specifying the fill character - it's always a |
541 | // space, so we never need to generate one. |
542 | |
543 | emitAlignment(FS, FormatSpec); |
544 | emitSign(FS, FormatSpec); |
545 | emitAlternativeForm(FS, FormatSpec); |
546 | |
547 | if (FS.hasLeadingZeros()) |
548 | FormatSpec.push_back(c: '0'); |
549 | |
550 | emitFieldWidth(FS, FormatSpec); |
551 | emitPrecision(FS, FormatSpec); |
552 | maybeRotateArguments(FS); |
553 | |
554 | if (!emitType(FS, Arg, FormatSpec)) |
555 | return false; |
556 | |
557 | if (!FormatSpec.empty()) { |
558 | StandardFormatString.push_back(c: ':'); |
559 | StandardFormatString.append(str: FormatSpec); |
560 | } |
561 | |
562 | StandardFormatString.push_back(c: '}'); |
563 | return true; |
564 | } |
565 | |
566 | /// Called for each format specifier by ParsePrintfString. |
567 | bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, |
568 | const char *StartSpecifier, |
569 | unsigned SpecifierLen, |
570 | const TargetInfo &Target) { |
571 | |
572 | const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); |
573 | assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); |
574 | |
575 | // Everything before the specifier needs copying verbatim |
576 | assert(StartSpecifierPos >= PrintfFormatStringPos); |
577 | |
578 | appendFormatText(Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
579 | StartSpecifierPos - PrintfFormatStringPos)); |
580 | |
581 | const ConversionSpecifier::Kind ArgKind = |
582 | FS.getConversionSpecifier().getKind(); |
583 | |
584 | // Skip over specifier |
585 | PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; |
586 | assert(PrintfFormatStringPos <= PrintfFormatString.size()); |
587 | |
588 | FormatStringNeededRewriting = true; |
589 | |
590 | if (ArgKind == ConversionSpecifier::Kind::nArg) { |
591 | // std::print doesn't do the equivalent of %n |
592 | return conversionNotPossible(Reason: "'%n' is not supported in format string" ); |
593 | } |
594 | |
595 | if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { |
596 | // std::print doesn't support %m. In theory we could insert a |
597 | // strerror(errno) parameter (assuming that libc has a thread-safe |
598 | // implementation, which glibc does), but that would require keeping track |
599 | // of the input and output parameter indices for position arguments too. |
600 | return conversionNotPossible(Reason: "'%m' is not supported in format string" ); |
601 | } |
602 | |
603 | if (ArgKind == ConversionSpecifier::PercentArg) { |
604 | StandardFormatString.push_back(c: '%'); |
605 | return true; |
606 | } |
607 | |
608 | const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; |
609 | if (ArgIndex >= NumArgs) { |
610 | // Argument index out of range. Give up. |
611 | return conversionNotPossible( |
612 | Reason: (Twine("argument index " ) + Twine(ArgIndex) + " is out of range" ) |
613 | .str()); |
614 | } |
615 | |
616 | return convertArgument(FS, Arg: Args[ArgIndex]->IgnoreImplicitAsWritten(), |
617 | StandardFormatString); |
618 | } |
619 | |
620 | /// Called at the very end just before applying fixes to capture the last part |
621 | /// of the format string. |
622 | void FormatStringConverter::finalizeFormatText() { |
623 | appendFormatText( |
624 | Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
625 | PrintfFormatString.size() - PrintfFormatStringPos)); |
626 | PrintfFormatStringPos = PrintfFormatString.size(); |
627 | |
628 | // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") |
629 | // than to std::println("Hello\r"); |
630 | if (StringRef(StandardFormatString).ends_with(Suffix: "\\n" ) && |
631 | !StringRef(StandardFormatString).ends_with(Suffix: "\\\\n" ) && |
632 | !StringRef(StandardFormatString).ends_with(Suffix: "\\r\\n" )) { |
633 | UsePrintNewlineFunction = true; |
634 | FormatStringNeededRewriting = true; |
635 | StandardFormatString.erase(first: StandardFormatString.end() - 2, |
636 | last: StandardFormatString.end()); |
637 | } |
638 | |
639 | StandardFormatString.push_back(c: '\"'); |
640 | } |
641 | |
642 | /// Append literal parts of the format text, reinstating escapes as required. |
643 | void FormatStringConverter::appendFormatText(const StringRef Text) { |
644 | for (const char Ch : Text) { |
645 | if (Ch == '\a') |
646 | StandardFormatString += "\\a" ; |
647 | else if (Ch == '\b') |
648 | StandardFormatString += "\\b" ; |
649 | else if (Ch == '\f') |
650 | StandardFormatString += "\\f" ; |
651 | else if (Ch == '\n') |
652 | StandardFormatString += "\\n" ; |
653 | else if (Ch == '\r') |
654 | StandardFormatString += "\\r" ; |
655 | else if (Ch == '\t') |
656 | StandardFormatString += "\\t" ; |
657 | else if (Ch == '\v') |
658 | StandardFormatString += "\\v" ; |
659 | else if (Ch == '\"') |
660 | StandardFormatString += "\\\"" ; |
661 | else if (Ch == '\\') |
662 | StandardFormatString += "\\\\" ; |
663 | else if (Ch == '{') { |
664 | StandardFormatString += "{{" ; |
665 | FormatStringNeededRewriting = true; |
666 | } else if (Ch == '}') { |
667 | StandardFormatString += "}}" ; |
668 | FormatStringNeededRewriting = true; |
669 | } else if (Ch < 32) { |
670 | StandardFormatString += "\\x" ; |
671 | StandardFormatString += llvm::hexdigit(X: Ch >> 4, LowerCase: true); |
672 | StandardFormatString += llvm::hexdigit(X: Ch & 0xf, LowerCase: true); |
673 | } else |
674 | StandardFormatString += Ch; |
675 | } |
676 | } |
677 | |
678 | static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, |
679 | ASTContext &Context) { |
680 | const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>(ID: "arg" ); |
681 | const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>(ID: "member" ); |
682 | const bool Arrow = Member->isArrow(); |
683 | return Arrow ? utils::fixit::formatDereference(ExprNode: *Arg, Context) |
684 | : tooling::fixit::getText(Node: *Arg, Context).str(); |
685 | } |
686 | |
687 | /// Called by the check when it is ready to apply the fixes. |
688 | void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, |
689 | SourceManager &SM) { |
690 | if (FormatStringNeededRewriting) { |
691 | Diag << FixItHint::CreateReplacement( |
692 | RemoveRange: CharSourceRange::getTokenRange(B: FormatExpr->getBeginLoc(), |
693 | E: FormatExpr->getEndLoc()), |
694 | Code: StandardFormatString); |
695 | } |
696 | |
697 | // ArgCount is one less than the number of arguments to be rotated. |
698 | for (auto [ValueArgIndex, ArgCount] : ArgRotates) { |
699 | assert(ValueArgIndex < NumArgs); |
700 | assert(ValueArgIndex > ArgCount); |
701 | |
702 | // First move the value argument to the right place. But if there's a |
703 | // pending c_str() removal then we must do that at the same time. |
704 | if (const auto CStrRemovalMatch = |
705 | std::find_if(first: ArgCStrRemovals.cbegin(), last: ArgCStrRemovals.cend(), |
706 | pred: [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( |
707 | const BoundNodes &Match) { |
708 | // This c_str() removal corresponds to the argument |
709 | // being moved if they start at the same location. |
710 | const Expr *CStrArg = Match.getNodeAs<Expr>(ID: "arg" ); |
711 | return ArgStartPos == CStrArg->getBeginLoc(); |
712 | }); |
713 | CStrRemovalMatch != ArgCStrRemovals.end()) { |
714 | const std::string ArgText = |
715 | withoutCStrReplacement(CStrRemovalMatch: *CStrRemovalMatch, Context&: *Context); |
716 | assert(!ArgText.empty()); |
717 | |
718 | Diag << FixItHint::CreateReplacement( |
719 | Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); |
720 | |
721 | // That c_str() removal is now dealt with, so we don't need to do it again |
722 | ArgCStrRemovals.erase(position: CStrRemovalMatch); |
723 | } else |
724 | Diag << tooling::fixit::createReplacement(Destination: *Args[ValueArgIndex - ArgCount], |
725 | Source: *Args[ValueArgIndex], Context: *Context); |
726 | |
727 | // Now shift down the field width and precision (if either are present) to |
728 | // accommodate it. |
729 | for (size_t Offset = 0; Offset < ArgCount; ++Offset) |
730 | Diag << tooling::fixit::createReplacement( |
731 | Destination: *Args[ValueArgIndex - Offset], Source: *Args[ValueArgIndex - Offset - 1], |
732 | Context: *Context); |
733 | |
734 | // Now we need to modify the ArgFix index too so that we fix the right |
735 | // argument. We don't need to care about the width and precision indices |
736 | // since they never need fixing. |
737 | for (auto &ArgFix : ArgFixes) { |
738 | if (ArgFix.ArgIndex == ValueArgIndex) |
739 | ArgFix.ArgIndex = ValueArgIndex - ArgCount; |
740 | } |
741 | } |
742 | |
743 | for (const auto &[ArgIndex, Replacement] : ArgFixes) { |
744 | SourceLocation AfterOtherSide = |
745 | Lexer::findNextToken(Loc: Args[ArgIndex]->getEndLoc(), SM, LangOpts) |
746 | ->getLocation(); |
747 | |
748 | Diag << FixItHint::CreateInsertion(InsertionLoc: Args[ArgIndex]->getBeginLoc(), |
749 | Code: Replacement, BeforePreviousInsertions: true) |
750 | << FixItHint::CreateInsertion(InsertionLoc: AfterOtherSide, Code: ")" , BeforePreviousInsertions: true); |
751 | } |
752 | |
753 | for (const auto &Match : ArgCStrRemovals) { |
754 | const auto *Call = Match.getNodeAs<CallExpr>(ID: "call" ); |
755 | const std::string ArgText = withoutCStrReplacement(CStrRemovalMatch: Match, Context&: *Context); |
756 | if (!ArgText.empty()) |
757 | Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); |
758 | } |
759 | } |
760 | } // namespace clang::tidy::utils |
761 | |