1 | //===--- FormatStringConverter.cpp - clang-tidy----------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Implementation of the FormatStringConverter class which is used to convert |
11 | /// printf format strings to C++ std::formatter format strings. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatStringConverter.h" |
16 | #include "../utils/FixItHintUtils.h" |
17 | #include "clang/AST/Expr.h" |
18 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
19 | #include "clang/Basic/LangOptions.h" |
20 | #include "clang/Lex/Lexer.h" |
21 | #include "clang/Lex/Preprocessor.h" |
22 | #include "clang/Tooling/FixIt.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/Support/Debug.h" |
25 | |
26 | using namespace clang::ast_matchers; |
27 | using namespace clang::analyze_printf; |
28 | |
29 | namespace clang::tidy::utils { |
30 | using clang::analyze_format_string::ConversionSpecifier; |
31 | |
32 | /// Is the passed type the actual "char" type, whether that be signed or |
33 | /// unsigned, rather than explicit signed char or unsigned char types. |
34 | static bool isRealCharType(const clang::QualType &Ty) { |
35 | using namespace clang; |
36 | const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); |
37 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: DesugaredType)) |
38 | return (BT->getKind() == BuiltinType::Char_U || |
39 | BT->getKind() == BuiltinType::Char_S); |
40 | return false; |
41 | } |
42 | |
43 | /// If possible, return the text name of the signed type that corresponds to the |
44 | /// passed integer type. If the passed type is already signed then its name is |
45 | /// just returned. Only supports BuiltinTypes. |
46 | static std::optional<std::string> |
47 | getCorrespondingSignedTypeName(const clang::QualType &QT) { |
48 | using namespace clang; |
49 | const auto UQT = QT.getUnqualifiedType(); |
50 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
51 | switch (BT->getKind()) { |
52 | case BuiltinType::UChar: |
53 | case BuiltinType::Char_U: |
54 | case BuiltinType::SChar: |
55 | case BuiltinType::Char_S: |
56 | return "signed char"; |
57 | case BuiltinType::UShort: |
58 | case BuiltinType::Short: |
59 | return "short"; |
60 | case BuiltinType::UInt: |
61 | case BuiltinType::Int: |
62 | return "int"; |
63 | case BuiltinType::ULong: |
64 | case BuiltinType::Long: |
65 | return "long"; |
66 | case BuiltinType::ULongLong: |
67 | case BuiltinType::LongLong: |
68 | return "long long"; |
69 | default: |
70 | llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" |
71 | << QT.getAsString() << "'\n"; |
72 | return std::nullopt; |
73 | } |
74 | } |
75 | |
76 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
77 | // if the argument type does. |
78 | const std::string TypeName = UQT.getAsString(); |
79 | StringRef SimplifiedTypeName{TypeName}; |
80 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::"); |
81 | const StringRef Prefix = InStd ? "std::": ""; |
82 | |
83 | if (SimplifiedTypeName.starts_with(Prefix: "uint") && |
84 | SimplifiedTypeName.ends_with(Suffix: "_t")) |
85 | return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); |
86 | |
87 | if (SimplifiedTypeName == "size_t") |
88 | return (Twine(Prefix) + "ssize_t").str(); |
89 | |
90 | llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" |
91 | << UQT.getAsString() << "'\n"; |
92 | return std::nullopt; |
93 | } |
94 | |
95 | /// If possible, return the text name of the unsigned type that corresponds to |
96 | /// the passed integer type. If the passed type is already unsigned then its |
97 | /// name is just returned. Only supports BuiltinTypes. |
98 | static std::optional<std::string> |
99 | getCorrespondingUnsignedTypeName(const clang::QualType &QT) { |
100 | using namespace clang; |
101 | const auto UQT = QT.getUnqualifiedType(); |
102 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
103 | switch (BT->getKind()) { |
104 | case BuiltinType::SChar: |
105 | case BuiltinType::Char_S: |
106 | case BuiltinType::UChar: |
107 | case BuiltinType::Char_U: |
108 | return "unsigned char"; |
109 | case BuiltinType::Short: |
110 | case BuiltinType::UShort: |
111 | return "unsigned short"; |
112 | case BuiltinType::Int: |
113 | case BuiltinType::UInt: |
114 | return "unsigned int"; |
115 | case BuiltinType::Long: |
116 | case BuiltinType::ULong: |
117 | return "unsigned long"; |
118 | case BuiltinType::LongLong: |
119 | case BuiltinType::ULongLong: |
120 | return "unsigned long long"; |
121 | default: |
122 | llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" |
123 | << UQT.getAsString() << "'\n"; |
124 | return std::nullopt; |
125 | } |
126 | } |
127 | |
128 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
129 | // if the argument type does. |
130 | const std::string TypeName = UQT.getAsString(); |
131 | StringRef SimplifiedTypeName{TypeName}; |
132 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::"); |
133 | const StringRef Prefix = InStd ? "std::": ""; |
134 | |
135 | if (SimplifiedTypeName.starts_with(Prefix: "int") && |
136 | SimplifiedTypeName.ends_with(Suffix: "_t")) |
137 | return (Twine(Prefix) + "u"+ SimplifiedTypeName).str(); |
138 | |
139 | if (SimplifiedTypeName == "ssize_t") |
140 | return (Twine(Prefix) + "size_t").str(); |
141 | if (SimplifiedTypeName == "ptrdiff_t") |
142 | return (Twine(Prefix) + "size_t").str(); |
143 | |
144 | llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" |
145 | << UQT.getAsString() << "'\n"; |
146 | return std::nullopt; |
147 | } |
148 | |
149 | static std::optional<std::string> |
150 | castTypeForArgument(ConversionSpecifier::Kind ArgKind, |
151 | const clang::QualType &QT) { |
152 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
153 | return getCorrespondingUnsignedTypeName(QT); |
154 | return getCorrespondingSignedTypeName(QT); |
155 | } |
156 | |
157 | static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, |
158 | const clang::QualType &ArgType) { |
159 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: ArgType)) { |
160 | // Unadorned char never matches any expected signedness since it |
161 | // could be signed or unsigned. |
162 | const auto ArgTypeKind = BT->getKind(); |
163 | if (ArgTypeKind == BuiltinType::Char_U || |
164 | ArgTypeKind == BuiltinType::Char_S) |
165 | return false; |
166 | } |
167 | |
168 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
169 | return ArgType->isUnsignedIntegerType(); |
170 | return ArgType->isSignedIntegerType(); |
171 | } |
172 | |
173 | namespace { |
174 | AST_MATCHER(clang::QualType, isRealChar) { |
175 | return clang::tidy::utils::isRealCharType(Ty: Node); |
176 | } |
177 | } // namespace |
178 | |
179 | static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { |
180 | /// For printf-style functions, the signedness of the type printed is |
181 | /// indicated by the corresponding type in the format string. |
182 | /// std::print will determine the signedness from the type of the |
183 | /// argument. This means that it is necessary to generate a cast in |
184 | /// StrictMode to ensure that the exact behaviour is maintained. |
185 | /// However, for templated functions like absl::PrintF and |
186 | /// fmt::printf, the signedness of the type printed is also taken from |
187 | /// the actual argument like std::print, so such casts are never |
188 | /// necessary. printf-style functions are variadic, whereas templated |
189 | /// ones aren't, so we can use that to distinguish between the two |
190 | /// cases. |
191 | if (StrictMode) { |
192 | const FunctionDecl *FuncDecl = Call->getDirectCallee(); |
193 | assert(FuncDecl); |
194 | return FuncDecl->isVariadic(); |
195 | } |
196 | return false; |
197 | } |
198 | |
199 | FormatStringConverter::FormatStringConverter( |
200 | ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset, |
201 | const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM, |
202 | Preprocessor &PP) |
203 | : Context(ContextIn), Config(ConfigIn), |
204 | CastMismatchedIntegerTypes( |
205 | castMismatchedIntegerTypes(Call, StrictMode: ConfigIn.StrictMode)), |
206 | Args(Call->getArgs()), NumArgs(Call->getNumArgs()), |
207 | ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { |
208 | assert(ArgsOffset <= NumArgs); |
209 | FormatExpr = llvm::dyn_cast<StringLiteral>( |
210 | Val: Args[FormatArgOffset]->IgnoreImplicitAsWritten()); |
211 | |
212 | if (!FormatExpr || !FormatExpr->isOrdinary()) { |
213 | // Function must have a narrow string literal as its first argument. |
214 | conversionNotPossible(Reason: "first argument is not a narrow string literal"); |
215 | return; |
216 | } |
217 | |
218 | if (const std::optional<StringRef> MaybeMacroName = |
219 | formatStringContainsUnreplaceableMacro(CallExpr: Call, FormatExpr, SM, PP); |
220 | MaybeMacroName) { |
221 | conversionNotPossible( |
222 | Reason: ("format string contains unreplaceable macro '"+ *MaybeMacroName + "'") |
223 | .str()); |
224 | return; |
225 | } |
226 | |
227 | PrintfFormatString = FormatExpr->getString(); |
228 | |
229 | // Assume that the output will be approximately the same size as the input, |
230 | // but perhaps with a few escapes expanded. |
231 | const size_t EstimatedGrowth = 8; |
232 | StandardFormatString.reserve(res: PrintfFormatString.size() + EstimatedGrowth); |
233 | StandardFormatString.push_back(c: '\"'); |
234 | |
235 | const bool IsFreeBsdkPrintf = false; |
236 | |
237 | using clang::analyze_format_string::ParsePrintfString; |
238 | ParsePrintfString(H&: *this, beg: PrintfFormatString.data(), |
239 | end: PrintfFormatString.data() + PrintfFormatString.size(), |
240 | LO: LangOpts, Target: Context->getTargetInfo(), isFreeBSDKPrintf: IsFreeBsdkPrintf); |
241 | finalizeFormatText(); |
242 | } |
243 | |
244 | std::optional<StringRef> |
245 | FormatStringConverter::formatStringContainsUnreplaceableMacro( |
246 | const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM, |
247 | Preprocessor &PP) { |
248 | // If a macro invocation surrounds the entire call then we don't want that to |
249 | // inhibit conversion. The whole format string will appear to come from that |
250 | // macro, as will the function call. |
251 | std::optional<StringRef> MaybeSurroundingMacroName; |
252 | if (SourceLocation BeginCallLoc = Call->getBeginLoc(); |
253 | BeginCallLoc.isMacroID()) |
254 | MaybeSurroundingMacroName = |
255 | Lexer::getImmediateMacroName(Loc: BeginCallLoc, SM, LangOpts: PP.getLangOpts()); |
256 | |
257 | for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end(); |
258 | I != E; ++I) { |
259 | const SourceLocation &TokenLoc = *I; |
260 | if (TokenLoc.isMacroID()) { |
261 | const StringRef MacroName = |
262 | Lexer::getImmediateMacroName(Loc: TokenLoc, SM, LangOpts: PP.getLangOpts()); |
263 | |
264 | if (MaybeSurroundingMacroName != MacroName) { |
265 | // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes |
266 | // for types that change size so we must look for multiple prefixes. |
267 | if (!MacroName.starts_with(Prefix: "PRI") && !MacroName.starts_with(Prefix: "__PRI")) |
268 | return MacroName; |
269 | |
270 | const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(Loc: TokenLoc); |
271 | const OptionalFileEntryRef MaybeFileEntry = |
272 | SM.getFileEntryRefForID(FID: SM.getFileID(SpellingLoc: TokenSpellingLoc)); |
273 | if (!MaybeFileEntry) |
274 | return MacroName; |
275 | |
276 | HeaderSearch &HS = PP.getHeaderSearchInfo(); |
277 | // Check if the file is a system header |
278 | if (!isSystem(CK: HS.getFileDirFlavor(File: *MaybeFileEntry)) || |
279 | llvm::sys::path::filename(path: MaybeFileEntry->getName()) != |
280 | "inttypes.h") |
281 | return MacroName; |
282 | } |
283 | } |
284 | } |
285 | return std::nullopt; |
286 | } |
287 | |
288 | void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, |
289 | std::string &FormatSpec) { |
290 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
291 | |
292 | // We only care about alignment if a field width is specified |
293 | if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { |
294 | if (ArgKind == ConversionSpecifier::sArg) { |
295 | // Strings are left-aligned by default with std::format, so we only |
296 | // need to emit an alignment if this one needs to be right aligned. |
297 | if (!FS.isLeftJustified()) |
298 | FormatSpec.push_back(c: '>'); |
299 | } else { |
300 | // Numbers are right-aligned by default with std::format, so we only |
301 | // need to emit an alignment if this one needs to be left aligned. |
302 | if (FS.isLeftJustified()) |
303 | FormatSpec.push_back(c: '<'); |
304 | } |
305 | } |
306 | } |
307 | |
308 | void FormatStringConverter::emitSign(const PrintfSpecifier &FS, |
309 | std::string &FormatSpec) { |
310 | const ConversionSpecifier Spec = FS.getConversionSpecifier(); |
311 | |
312 | // Ignore on something that isn't numeric. For printf it's would be a |
313 | // compile-time warning but ignored at runtime, but for std::format it |
314 | // ought to be a compile-time error. |
315 | if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { |
316 | // + is preferred to ' ' |
317 | if (FS.hasPlusPrefix()) |
318 | FormatSpec.push_back(c: '+'); |
319 | else if (FS.hasSpacePrefix()) |
320 | FormatSpec.push_back(c: ' '); |
321 | } |
322 | } |
323 | |
324 | void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, |
325 | std::string &FormatSpec) { |
326 | if (FS.hasAlternativeForm()) { |
327 | switch (FS.getConversionSpecifier().getKind()) { |
328 | case ConversionSpecifier::Kind::aArg: |
329 | case ConversionSpecifier::Kind::AArg: |
330 | case ConversionSpecifier::Kind::eArg: |
331 | case ConversionSpecifier::Kind::EArg: |
332 | case ConversionSpecifier::Kind::fArg: |
333 | case ConversionSpecifier::Kind::FArg: |
334 | case ConversionSpecifier::Kind::gArg: |
335 | case ConversionSpecifier::Kind::GArg: |
336 | case ConversionSpecifier::Kind::xArg: |
337 | case ConversionSpecifier::Kind::XArg: |
338 | case ConversionSpecifier::Kind::oArg: |
339 | FormatSpec.push_back(c: '#'); |
340 | break; |
341 | default: |
342 | // Alternative forms don't exist for other argument kinds |
343 | break; |
344 | } |
345 | } |
346 | } |
347 | |
348 | void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, |
349 | std::string &FormatSpec) { |
350 | { |
351 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
352 | switch (FieldWidth.getHowSpecified()) { |
353 | case OptionalAmount::NotSpecified: |
354 | break; |
355 | case OptionalAmount::Constant: |
356 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getConstantAmount())); |
357 | break; |
358 | case OptionalAmount::Arg: |
359 | FormatSpec.push_back(c: '{'); |
360 | if (FieldWidth.usesPositionalArg()) { |
361 | // std::format argument identifiers are zero-based, whereas printf |
362 | // ones are one based. |
363 | assert(FieldWidth.getPositionalArgIndex() > 0U); |
364 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getPositionalArgIndex() - 1)); |
365 | } |
366 | FormatSpec.push_back(c: '}'); |
367 | break; |
368 | case OptionalAmount::Invalid: |
369 | break; |
370 | } |
371 | } |
372 | } |
373 | |
374 | void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, |
375 | std::string &FormatSpec) { |
376 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
377 | switch (FieldPrecision.getHowSpecified()) { |
378 | case OptionalAmount::NotSpecified: |
379 | break; |
380 | case OptionalAmount::Constant: |
381 | FormatSpec.push_back(c: '.'); |
382 | FormatSpec.append(str: llvm::utostr(X: FieldPrecision.getConstantAmount())); |
383 | break; |
384 | case OptionalAmount::Arg: |
385 | FormatSpec.push_back(c: '.'); |
386 | FormatSpec.push_back(c: '{'); |
387 | if (FieldPrecision.usesPositionalArg()) { |
388 | // std::format argument identifiers are zero-based, whereas printf |
389 | // ones are one based. |
390 | assert(FieldPrecision.getPositionalArgIndex() > 0U); |
391 | FormatSpec.append( |
392 | str: llvm::utostr(X: FieldPrecision.getPositionalArgIndex() - 1)); |
393 | } |
394 | FormatSpec.push_back(c: '}'); |
395 | break; |
396 | case OptionalAmount::Invalid: |
397 | break; |
398 | } |
399 | } |
400 | |
401 | void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { |
402 | unsigned ArgCount = 0; |
403 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
404 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
405 | |
406 | if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && |
407 | !FieldWidth.usesPositionalArg()) |
408 | ++ArgCount; |
409 | if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && |
410 | !FieldPrecision.usesPositionalArg()) |
411 | ++ArgCount; |
412 | |
413 | if (ArgCount) |
414 | ArgRotates.emplace_back(args: FS.getArgIndex() + ArgsOffset, args&: ArgCount); |
415 | } |
416 | |
417 | void FormatStringConverter::emitStringArgument(unsigned ArgIndex, |
418 | const Expr *Arg) { |
419 | // If the argument is the result of a call to std::string::c_str() or |
420 | // data() with a return type of char then we can remove that call and |
421 | // pass the std::string directly. We don't want to do so if the return |
422 | // type is not a char pointer (though it's unlikely that such code would |
423 | // compile without warnings anyway.) See RedundantStringCStrCheck. |
424 | |
425 | if (!StringCStrCallExprMatcher) { |
426 | // Lazily create the matcher |
427 | const auto StringDecl = type(hasUnqualifiedDesugaredType(InnerMatcher: recordType( |
428 | hasDeclaration(InnerMatcher: cxxRecordDecl(hasName(Name: "::std::basic_string")))))); |
429 | const auto StringExpr = expr( |
430 | anyOf(hasType(InnerMatcher: StringDecl), hasType(InnerMatcher: qualType(pointsTo(InnerMatcher: StringDecl))))); |
431 | |
432 | StringCStrCallExprMatcher = |
433 | cxxMemberCallExpr( |
434 | on(InnerMatcher: StringExpr.bind(ID: "arg")), callee(InnerMatcher: memberExpr().bind(ID: "member")), |
435 | callee(InnerMatcher: cxxMethodDecl(hasAnyName("c_str", "data"), |
436 | returns(InnerMatcher: pointerType(pointee(isRealChar())))))) |
437 | .bind(ID: "call"); |
438 | } |
439 | |
440 | auto CStrMatches = match(Matcher: *StringCStrCallExprMatcher, Node: *Arg, Context&: *Context); |
441 | if (CStrMatches.size() == 1) |
442 | ArgCStrRemovals.push_back(x: CStrMatches.front()); |
443 | else if (Arg->getType()->isPointerType()) { |
444 | const QualType Pointee = Arg->getType()->getPointeeType(); |
445 | // printf is happy to print signed char and unsigned char strings, but |
446 | // std::format only likes char strings. |
447 | if (Pointee->isCharType() && !isRealCharType(Ty: Pointee)) |
448 | ArgFixes.emplace_back(args&: ArgIndex, args: "reinterpret_cast<const char *>("); |
449 | } |
450 | } |
451 | |
452 | bool FormatStringConverter::emitIntegerArgument( |
453 | ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, |
454 | std::string &FormatSpec) { |
455 | const clang::QualType &ArgType = Arg->getType(); |
456 | if (ArgType->isBooleanType()) { |
457 | // std::format will print bool as either "true" or "false" by default, |
458 | // but printf prints them as "0" or "1". Be compatible with printf by |
459 | // requesting decimal output. |
460 | FormatSpec.push_back(c: 'd'); |
461 | } else if (ArgType->isEnumeralType()) { |
462 | // std::format will try to find a specialization to print the enum |
463 | // (and probably fail), whereas printf would have just expected it to |
464 | // be passed as its underlying type. However, printf will have forced |
465 | // the signedness based on the format string, so we need to do the |
466 | // same. |
467 | if (const auto *ET = ArgType->getAs<EnumType>()) { |
468 | if (const std::optional<std::string> MaybeCastType = |
469 | castTypeForArgument(ArgKind, QT: ET->getDecl()->getIntegerType())) |
470 | ArgFixes.emplace_back( |
471 | args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str()); |
472 | else |
473 | return conversionNotPossible( |
474 | Reason: (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type") |
475 | .str()); |
476 | } |
477 | } else if (CastMismatchedIntegerTypes && |
478 | !isMatchingSignedness(ArgKind, ArgType)) { |
479 | // printf will happily print an unsigned type as signed if told to. |
480 | // Even -Wformat doesn't warn for this. std::format will format as |
481 | // unsigned unless we cast it. |
482 | if (const std::optional<std::string> MaybeCastType = |
483 | castTypeForArgument(ArgKind, QT: ArgType)) |
484 | ArgFixes.emplace_back( |
485 | args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str()); |
486 | else |
487 | return conversionNotPossible( |
488 | Reason: (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to "+ |
489 | Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" |
490 | : "signed") + |
491 | " integer type to match format" |
492 | " specifier and StrictMode is enabled") |
493 | .str()); |
494 | } else if (isRealCharType(Ty: ArgType) || !ArgType->isIntegerType()) { |
495 | // Only specify integer if the argument is of a different type |
496 | FormatSpec.push_back(c: 'd'); |
497 | } |
498 | return true; |
499 | } |
500 | |
501 | /// Append the corresponding standard format string type fragment to FormatSpec, |
502 | /// and store any argument fixes for later application. |
503 | /// @returns true on success, false on failure |
504 | bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, |
505 | std::string &FormatSpec) { |
506 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
507 | switch (ArgKind) { |
508 | case ConversionSpecifier::Kind::sArg: |
509 | emitStringArgument(ArgIndex: FS.getArgIndex() + ArgsOffset, Arg); |
510 | break; |
511 | case ConversionSpecifier::Kind::cArg: |
512 | // The type must be "c" to get a character unless the type is exactly |
513 | // char (whether that be signed or unsigned for the target.) |
514 | if (!isRealCharType(Ty: Arg->getType())) |
515 | FormatSpec.push_back(c: 'c'); |
516 | break; |
517 | case ConversionSpecifier::Kind::dArg: |
518 | case ConversionSpecifier::Kind::iArg: |
519 | case ConversionSpecifier::Kind::uArg: |
520 | if (!emitIntegerArgument(ArgKind, Arg, ArgIndex: FS.getArgIndex() + ArgsOffset, |
521 | FormatSpec)) |
522 | return false; |
523 | break; |
524 | case ConversionSpecifier::Kind::pArg: { |
525 | const clang::QualType &ArgType = Arg->getType(); |
526 | // std::format knows how to format void pointers and nullptrs |
527 | if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) |
528 | ArgFixes.emplace_back(args: FS.getArgIndex() + ArgsOffset, |
529 | args: "static_cast<const void *>("); |
530 | break; |
531 | } |
532 | case ConversionSpecifier::Kind::xArg: |
533 | FormatSpec.push_back(c: 'x'); |
534 | break; |
535 | case ConversionSpecifier::Kind::XArg: |
536 | FormatSpec.push_back(c: 'X'); |
537 | break; |
538 | case ConversionSpecifier::Kind::oArg: |
539 | FormatSpec.push_back(c: 'o'); |
540 | break; |
541 | case ConversionSpecifier::Kind::aArg: |
542 | FormatSpec.push_back(c: 'a'); |
543 | break; |
544 | case ConversionSpecifier::Kind::AArg: |
545 | FormatSpec.push_back(c: 'A'); |
546 | break; |
547 | case ConversionSpecifier::Kind::eArg: |
548 | FormatSpec.push_back(c: 'e'); |
549 | break; |
550 | case ConversionSpecifier::Kind::EArg: |
551 | FormatSpec.push_back(c: 'E'); |
552 | break; |
553 | case ConversionSpecifier::Kind::fArg: |
554 | FormatSpec.push_back(c: 'f'); |
555 | break; |
556 | case ConversionSpecifier::Kind::FArg: |
557 | FormatSpec.push_back(c: 'F'); |
558 | break; |
559 | case ConversionSpecifier::Kind::gArg: |
560 | FormatSpec.push_back(c: 'g'); |
561 | break; |
562 | case ConversionSpecifier::Kind::GArg: |
563 | FormatSpec.push_back(c: 'G'); |
564 | break; |
565 | default: |
566 | // Something we don't understand |
567 | return conversionNotPossible(Reason: (Twine("argument ") + |
568 | Twine(FS.getArgIndex() + ArgsOffset) + |
569 | " has an unsupported format specifier") |
570 | .str()); |
571 | } |
572 | |
573 | return true; |
574 | } |
575 | |
576 | /// Append the standard format string equivalent of the passed PrintfSpecifier |
577 | /// to StandardFormatString and store any argument fixes for later application. |
578 | /// @returns true on success, false on failure |
579 | bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, |
580 | const Expr *Arg, |
581 | std::string &StandardFormatString) { |
582 | // The specifier must have an associated argument |
583 | assert(FS.consumesDataArgument()); |
584 | |
585 | StandardFormatString.push_back(c: '{'); |
586 | |
587 | if (FS.usesPositionalArg()) { |
588 | // std::format argument identifiers are zero-based, whereas printf ones |
589 | // are one based. |
590 | assert(FS.getPositionalArgIndex() > 0U); |
591 | StandardFormatString.append(str: llvm::utostr(X: FS.getPositionalArgIndex() - 1)); |
592 | } |
593 | |
594 | // std::format format argument parts to potentially emit: |
595 | // [[fill]align][sign]["#"]["0"][width]["."precision][type] |
596 | std::string FormatSpec; |
597 | |
598 | // printf doesn't support specifying the fill character - it's always a |
599 | // space, so we never need to generate one. |
600 | |
601 | emitAlignment(FS, FormatSpec); |
602 | emitSign(FS, FormatSpec); |
603 | emitAlternativeForm(FS, FormatSpec); |
604 | |
605 | if (FS.hasLeadingZeros()) |
606 | FormatSpec.push_back(c: '0'); |
607 | |
608 | emitFieldWidth(FS, FormatSpec); |
609 | emitPrecision(FS, FormatSpec); |
610 | maybeRotateArguments(FS); |
611 | |
612 | if (!emitType(FS, Arg, FormatSpec)) |
613 | return false; |
614 | |
615 | if (!FormatSpec.empty()) { |
616 | StandardFormatString.push_back(c: ':'); |
617 | StandardFormatString.append(str: FormatSpec); |
618 | } |
619 | |
620 | StandardFormatString.push_back(c: '}'); |
621 | return true; |
622 | } |
623 | |
624 | /// Called for each format specifier by ParsePrintfString. |
625 | bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, |
626 | const char *StartSpecifier, |
627 | unsigned SpecifierLen, |
628 | const TargetInfo &Target) { |
629 | |
630 | const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); |
631 | assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); |
632 | |
633 | // Everything before the specifier needs copying verbatim |
634 | assert(StartSpecifierPos >= PrintfFormatStringPos); |
635 | |
636 | appendFormatText(Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
637 | StartSpecifierPos - PrintfFormatStringPos)); |
638 | |
639 | const ConversionSpecifier::Kind ArgKind = |
640 | FS.getConversionSpecifier().getKind(); |
641 | |
642 | // Skip over specifier |
643 | PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; |
644 | assert(PrintfFormatStringPos <= PrintfFormatString.size()); |
645 | |
646 | FormatStringNeededRewriting = true; |
647 | |
648 | if (ArgKind == ConversionSpecifier::Kind::nArg) { |
649 | // std::print doesn't do the equivalent of %n |
650 | return conversionNotPossible(Reason: "'%n' is not supported in format string"); |
651 | } |
652 | |
653 | if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { |
654 | // std::print doesn't support %m. In theory we could insert a |
655 | // strerror(errno) parameter (assuming that libc has a thread-safe |
656 | // implementation, which glibc does), but that would require keeping track |
657 | // of the input and output parameter indices for position arguments too. |
658 | return conversionNotPossible(Reason: "'%m' is not supported in format string"); |
659 | } |
660 | |
661 | if (ArgKind == ConversionSpecifier::PercentArg) { |
662 | StandardFormatString.push_back(c: '%'); |
663 | return true; |
664 | } |
665 | |
666 | const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; |
667 | if (ArgIndex >= NumArgs) { |
668 | // Argument index out of range. Give up. |
669 | return conversionNotPossible( |
670 | Reason: (Twine("argument index ") + Twine(ArgIndex) + " is out of range") |
671 | .str()); |
672 | } |
673 | |
674 | return convertArgument(FS, Arg: Args[ArgIndex]->IgnoreImplicitAsWritten(), |
675 | StandardFormatString); |
676 | } |
677 | |
678 | /// Called at the very end just before applying fixes to capture the last part |
679 | /// of the format string. |
680 | void FormatStringConverter::finalizeFormatText() { |
681 | appendFormatText( |
682 | Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
683 | PrintfFormatString.size() - PrintfFormatStringPos)); |
684 | PrintfFormatStringPos = PrintfFormatString.size(); |
685 | |
686 | // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") |
687 | // than to std::println("Hello\r"); |
688 | // Use StringRef until C++20 std::string::ends_with() is available. |
689 | const auto StandardFormatStringRef = StringRef(StandardFormatString); |
690 | if (Config.AllowTrailingNewlineRemoval && |
691 | StandardFormatStringRef.ends_with(Suffix: "\\n") && |
692 | !StandardFormatStringRef.ends_with(Suffix: "\\\\n") && |
693 | !StandardFormatStringRef.ends_with(Suffix: "\\r\\n")) { |
694 | UsePrintNewlineFunction = true; |
695 | FormatStringNeededRewriting = true; |
696 | StandardFormatString.erase(first: StandardFormatString.end() - 2, |
697 | last: StandardFormatString.end()); |
698 | } |
699 | |
700 | StandardFormatString.push_back(c: '\"'); |
701 | } |
702 | |
703 | /// Append literal parts of the format text, reinstating escapes as required. |
704 | void FormatStringConverter::appendFormatText(const StringRef Text) { |
705 | for (const char Ch : Text) { |
706 | if (Ch == '\a') |
707 | StandardFormatString += "\\a"; |
708 | else if (Ch == '\b') |
709 | StandardFormatString += "\\b"; |
710 | else if (Ch == '\f') |
711 | StandardFormatString += "\\f"; |
712 | else if (Ch == '\n') |
713 | StandardFormatString += "\\n"; |
714 | else if (Ch == '\r') |
715 | StandardFormatString += "\\r"; |
716 | else if (Ch == '\t') |
717 | StandardFormatString += "\\t"; |
718 | else if (Ch == '\v') |
719 | StandardFormatString += "\\v"; |
720 | else if (Ch == '\"') |
721 | StandardFormatString += "\\\""; |
722 | else if (Ch == '\\') |
723 | StandardFormatString += "\\\\"; |
724 | else if (Ch == '{') { |
725 | StandardFormatString += "{{"; |
726 | FormatStringNeededRewriting = true; |
727 | } else if (Ch == '}') { |
728 | StandardFormatString += "}}"; |
729 | FormatStringNeededRewriting = true; |
730 | } else if (Ch < 32) { |
731 | StandardFormatString += "\\x"; |
732 | StandardFormatString += llvm::hexdigit(X: Ch >> 4, LowerCase: true); |
733 | StandardFormatString += llvm::hexdigit(X: Ch & 0xf, LowerCase: true); |
734 | } else |
735 | StandardFormatString += Ch; |
736 | } |
737 | } |
738 | |
739 | static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, |
740 | ASTContext &Context) { |
741 | const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>(ID: "arg"); |
742 | const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>(ID: "member"); |
743 | const bool Arrow = Member->isArrow(); |
744 | return Arrow ? utils::fixit::formatDereference(ExprNode: *Arg, Context) |
745 | : tooling::fixit::getText(Node: *Arg, Context).str(); |
746 | } |
747 | |
748 | /// Called by the check when it is ready to apply the fixes. |
749 | void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, |
750 | SourceManager &SM) { |
751 | if (FormatStringNeededRewriting) { |
752 | Diag << FixItHint::CreateReplacement( |
753 | RemoveRange: CharSourceRange::getTokenRange(B: FormatExpr->getBeginLoc(), |
754 | E: FormatExpr->getEndLoc()), |
755 | Code: StandardFormatString); |
756 | } |
757 | |
758 | // ArgCount is one less than the number of arguments to be rotated. |
759 | for (auto [ValueArgIndex, ArgCount] : ArgRotates) { |
760 | assert(ValueArgIndex < NumArgs); |
761 | assert(ValueArgIndex > ArgCount); |
762 | |
763 | // First move the value argument to the right place. But if there's a |
764 | // pending c_str() removal then we must do that at the same time. |
765 | if (const auto CStrRemovalMatch = |
766 | std::find_if(first: ArgCStrRemovals.cbegin(), last: ArgCStrRemovals.cend(), |
767 | pred: [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( |
768 | const BoundNodes &Match) { |
769 | // This c_str() removal corresponds to the argument |
770 | // being moved if they start at the same location. |
771 | const Expr *CStrArg = Match.getNodeAs<Expr>(ID: "arg"); |
772 | return ArgStartPos == CStrArg->getBeginLoc(); |
773 | }); |
774 | CStrRemovalMatch != ArgCStrRemovals.end()) { |
775 | const std::string ArgText = |
776 | withoutCStrReplacement(CStrRemovalMatch: *CStrRemovalMatch, Context&: *Context); |
777 | assert(!ArgText.empty()); |
778 | |
779 | Diag << FixItHint::CreateReplacement( |
780 | Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); |
781 | |
782 | // That c_str() removal is now dealt with, so we don't need to do it again |
783 | ArgCStrRemovals.erase(position: CStrRemovalMatch); |
784 | } else |
785 | Diag << tooling::fixit::createReplacement(Destination: *Args[ValueArgIndex - ArgCount], |
786 | Source: *Args[ValueArgIndex], Context: *Context); |
787 | |
788 | // Now shift down the field width and precision (if either are present) to |
789 | // accommodate it. |
790 | for (size_t Offset = 0; Offset < ArgCount; ++Offset) |
791 | Diag << tooling::fixit::createReplacement( |
792 | Destination: *Args[ValueArgIndex - Offset], Source: *Args[ValueArgIndex - Offset - 1], |
793 | Context: *Context); |
794 | |
795 | // Now we need to modify the ArgFix index too so that we fix the right |
796 | // argument. We don't need to care about the width and precision indices |
797 | // since they never need fixing. |
798 | for (auto &ArgFix : ArgFixes) { |
799 | if (ArgFix.ArgIndex == ValueArgIndex) |
800 | ArgFix.ArgIndex = ValueArgIndex - ArgCount; |
801 | } |
802 | } |
803 | |
804 | for (const auto &[ArgIndex, Replacement] : ArgFixes) { |
805 | SourceLocation AfterOtherSide = |
806 | Lexer::findNextToken(Loc: Args[ArgIndex]->getEndLoc(), SM, LangOpts) |
807 | ->getLocation(); |
808 | |
809 | Diag << FixItHint::CreateInsertion(InsertionLoc: Args[ArgIndex]->getBeginLoc(), |
810 | Code: Replacement, BeforePreviousInsertions: true) |
811 | << FixItHint::CreateInsertion(InsertionLoc: AfterOtherSide, Code: ")", BeforePreviousInsertions: true); |
812 | } |
813 | |
814 | for (const auto &Match : ArgCStrRemovals) { |
815 | const auto *Call = Match.getNodeAs<CallExpr>(ID: "call"); |
816 | const std::string ArgText = withoutCStrReplacement(CStrRemovalMatch: Match, Context&: *Context); |
817 | if (!ArgText.empty()) |
818 | Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); |
819 | } |
820 | } |
821 | } // namespace clang::tidy::utils |
822 |
Definitions
- isRealCharType
- getCorrespondingSignedTypeName
- getCorrespondingUnsignedTypeName
- castTypeForArgument
- isMatchingSignedness
- isRealChar
- castMismatchedIntegerTypes
- FormatStringConverter
- formatStringContainsUnreplaceableMacro
- emitAlignment
- emitSign
- emitAlternativeForm
- emitFieldWidth
- emitPrecision
- maybeRotateArguments
- emitStringArgument
- emitIntegerArgument
- emitType
- convertArgument
- HandlePrintfSpecifier
- finalizeFormatText
- appendFormatText
- withoutCStrReplacement
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more