| 1 | //===--- FormatStringConverter.cpp - clang-tidy----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// Implementation of the FormatStringConverter class which is used to convert |
| 11 | /// printf format strings to C++ std::formatter format strings. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "FormatStringConverter.h" |
| 16 | #include "../utils/FixItHintUtils.h" |
| 17 | #include "clang/AST/Expr.h" |
| 18 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 19 | #include "clang/Basic/LangOptions.h" |
| 20 | #include "clang/Lex/Lexer.h" |
| 21 | #include "clang/Lex/Preprocessor.h" |
| 22 | #include "clang/Tooling/FixIt.h" |
| 23 | #include "llvm/ADT/StringExtras.h" |
| 24 | #include "llvm/Support/Debug.h" |
| 25 | |
| 26 | using namespace clang::ast_matchers; |
| 27 | using namespace clang::analyze_printf; |
| 28 | |
| 29 | namespace clang::tidy::utils { |
| 30 | using clang::analyze_format_string::ConversionSpecifier; |
| 31 | |
| 32 | /// Is the passed type the actual "char" type, whether that be signed or |
| 33 | /// unsigned, rather than explicit signed char or unsigned char types. |
| 34 | static bool isRealCharType(const clang::QualType &Ty) { |
| 35 | using namespace clang; |
| 36 | const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); |
| 37 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: DesugaredType)) |
| 38 | return (BT->getKind() == BuiltinType::Char_U || |
| 39 | BT->getKind() == BuiltinType::Char_S); |
| 40 | return false; |
| 41 | } |
| 42 | |
| 43 | /// If possible, return the text name of the signed type that corresponds to the |
| 44 | /// passed integer type. If the passed type is already signed then its name is |
| 45 | /// just returned. Only supports BuiltinTypes. |
| 46 | static std::optional<std::string> |
| 47 | getCorrespondingSignedTypeName(const clang::QualType &QT) { |
| 48 | using namespace clang; |
| 49 | const auto UQT = QT.getUnqualifiedType(); |
| 50 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
| 51 | switch (BT->getKind()) { |
| 52 | case BuiltinType::UChar: |
| 53 | case BuiltinType::Char_U: |
| 54 | case BuiltinType::SChar: |
| 55 | case BuiltinType::Char_S: |
| 56 | return "signed char" ; |
| 57 | case BuiltinType::UShort: |
| 58 | case BuiltinType::Short: |
| 59 | return "short" ; |
| 60 | case BuiltinType::UInt: |
| 61 | case BuiltinType::Int: |
| 62 | return "int" ; |
| 63 | case BuiltinType::ULong: |
| 64 | case BuiltinType::Long: |
| 65 | return "long" ; |
| 66 | case BuiltinType::ULongLong: |
| 67 | case BuiltinType::LongLong: |
| 68 | return "long long" ; |
| 69 | default: |
| 70 | llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" |
| 71 | << QT.getAsString() << "'\n" ; |
| 72 | return std::nullopt; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
| 77 | // if the argument type does. |
| 78 | const std::string TypeName = UQT.getAsString(); |
| 79 | StringRef SimplifiedTypeName{TypeName}; |
| 80 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::" ); |
| 81 | const StringRef Prefix = InStd ? "std::" : "" ; |
| 82 | |
| 83 | if (SimplifiedTypeName.starts_with(Prefix: "uint" ) && |
| 84 | SimplifiedTypeName.ends_with(Suffix: "_t" )) |
| 85 | return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); |
| 86 | |
| 87 | if (SimplifiedTypeName == "size_t" ) |
| 88 | return (Twine(Prefix) + "ssize_t" ).str(); |
| 89 | |
| 90 | llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" |
| 91 | << UQT.getAsString() << "'\n" ; |
| 92 | return std::nullopt; |
| 93 | } |
| 94 | |
| 95 | /// If possible, return the text name of the unsigned type that corresponds to |
| 96 | /// the passed integer type. If the passed type is already unsigned then its |
| 97 | /// name is just returned. Only supports BuiltinTypes. |
| 98 | static std::optional<std::string> |
| 99 | getCorrespondingUnsignedTypeName(const clang::QualType &QT) { |
| 100 | using namespace clang; |
| 101 | const auto UQT = QT.getUnqualifiedType(); |
| 102 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) { |
| 103 | switch (BT->getKind()) { |
| 104 | case BuiltinType::SChar: |
| 105 | case BuiltinType::Char_S: |
| 106 | case BuiltinType::UChar: |
| 107 | case BuiltinType::Char_U: |
| 108 | return "unsigned char" ; |
| 109 | case BuiltinType::Short: |
| 110 | case BuiltinType::UShort: |
| 111 | return "unsigned short" ; |
| 112 | case BuiltinType::Int: |
| 113 | case BuiltinType::UInt: |
| 114 | return "unsigned int" ; |
| 115 | case BuiltinType::Long: |
| 116 | case BuiltinType::ULong: |
| 117 | return "unsigned long" ; |
| 118 | case BuiltinType::LongLong: |
| 119 | case BuiltinType::ULongLong: |
| 120 | return "unsigned long long" ; |
| 121 | default: |
| 122 | llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" |
| 123 | << UQT.getAsString() << "'\n" ; |
| 124 | return std::nullopt; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only |
| 129 | // if the argument type does. |
| 130 | const std::string TypeName = UQT.getAsString(); |
| 131 | StringRef SimplifiedTypeName{TypeName}; |
| 132 | const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::" ); |
| 133 | const StringRef Prefix = InStd ? "std::" : "" ; |
| 134 | |
| 135 | if (SimplifiedTypeName.starts_with(Prefix: "int" ) && |
| 136 | SimplifiedTypeName.ends_with(Suffix: "_t" )) |
| 137 | return (Twine(Prefix) + "u" + SimplifiedTypeName).str(); |
| 138 | |
| 139 | if (SimplifiedTypeName == "ssize_t" ) |
| 140 | return (Twine(Prefix) + "size_t" ).str(); |
| 141 | if (SimplifiedTypeName == "ptrdiff_t" ) |
| 142 | return (Twine(Prefix) + "size_t" ).str(); |
| 143 | |
| 144 | llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" |
| 145 | << UQT.getAsString() << "'\n" ; |
| 146 | return std::nullopt; |
| 147 | } |
| 148 | |
| 149 | static std::optional<std::string> |
| 150 | castTypeForArgument(ConversionSpecifier::Kind ArgKind, |
| 151 | const clang::QualType &QT) { |
| 152 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
| 153 | return getCorrespondingUnsignedTypeName(QT); |
| 154 | return getCorrespondingSignedTypeName(QT); |
| 155 | } |
| 156 | |
| 157 | static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, |
| 158 | const clang::QualType &ArgType) { |
| 159 | if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: ArgType)) { |
| 160 | // Unadorned char never matches any expected signedness since it |
| 161 | // could be signed or unsigned. |
| 162 | const auto ArgTypeKind = BT->getKind(); |
| 163 | if (ArgTypeKind == BuiltinType::Char_U || |
| 164 | ArgTypeKind == BuiltinType::Char_S) |
| 165 | return false; |
| 166 | } |
| 167 | |
| 168 | if (ArgKind == ConversionSpecifier::Kind::uArg) |
| 169 | return ArgType->isUnsignedIntegerType(); |
| 170 | return ArgType->isSignedIntegerType(); |
| 171 | } |
| 172 | |
| 173 | namespace { |
| 174 | AST_MATCHER(clang::QualType, isRealChar) { |
| 175 | return clang::tidy::utils::isRealCharType(Ty: Node); |
| 176 | } |
| 177 | } // namespace |
| 178 | |
| 179 | static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { |
| 180 | /// For printf-style functions, the signedness of the type printed is |
| 181 | /// indicated by the corresponding type in the format string. |
| 182 | /// std::print will determine the signedness from the type of the |
| 183 | /// argument. This means that it is necessary to generate a cast in |
| 184 | /// StrictMode to ensure that the exact behaviour is maintained. |
| 185 | /// However, for templated functions like absl::PrintF and |
| 186 | /// fmt::printf, the signedness of the type printed is also taken from |
| 187 | /// the actual argument like std::print, so such casts are never |
| 188 | /// necessary. printf-style functions are variadic, whereas templated |
| 189 | /// ones aren't, so we can use that to distinguish between the two |
| 190 | /// cases. |
| 191 | if (StrictMode) { |
| 192 | const FunctionDecl *FuncDecl = Call->getDirectCallee(); |
| 193 | assert(FuncDecl); |
| 194 | return FuncDecl->isVariadic(); |
| 195 | } |
| 196 | return false; |
| 197 | } |
| 198 | |
| 199 | FormatStringConverter::FormatStringConverter( |
| 200 | ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset, |
| 201 | const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM, |
| 202 | Preprocessor &PP) |
| 203 | : Context(ContextIn), Config(ConfigIn), |
| 204 | CastMismatchedIntegerTypes( |
| 205 | castMismatchedIntegerTypes(Call, StrictMode: ConfigIn.StrictMode)), |
| 206 | Args(Call->getArgs()), NumArgs(Call->getNumArgs()), |
| 207 | ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { |
| 208 | assert(ArgsOffset <= NumArgs); |
| 209 | FormatExpr = llvm::dyn_cast<StringLiteral>( |
| 210 | Val: Args[FormatArgOffset]->IgnoreImplicitAsWritten()); |
| 211 | |
| 212 | if (!FormatExpr || !FormatExpr->isOrdinary()) { |
| 213 | // Function must have a narrow string literal as its first argument. |
| 214 | conversionNotPossible(Reason: "first argument is not a narrow string literal" ); |
| 215 | return; |
| 216 | } |
| 217 | |
| 218 | if (const std::optional<StringRef> MaybeMacroName = |
| 219 | formatStringContainsUnreplaceableMacro(CallExpr: Call, FormatExpr, SM, PP); |
| 220 | MaybeMacroName) { |
| 221 | conversionNotPossible( |
| 222 | Reason: ("format string contains unreplaceable macro '" + *MaybeMacroName + "'" ) |
| 223 | .str()); |
| 224 | return; |
| 225 | } |
| 226 | |
| 227 | PrintfFormatString = FormatExpr->getString(); |
| 228 | |
| 229 | // Assume that the output will be approximately the same size as the input, |
| 230 | // but perhaps with a few escapes expanded. |
| 231 | const size_t EstimatedGrowth = 8; |
| 232 | StandardFormatString.reserve(res: PrintfFormatString.size() + EstimatedGrowth); |
| 233 | StandardFormatString.push_back(c: '\"'); |
| 234 | |
| 235 | const bool IsFreeBsdkPrintf = false; |
| 236 | |
| 237 | using clang::analyze_format_string::ParsePrintfString; |
| 238 | ParsePrintfString(H&: *this, beg: PrintfFormatString.data(), |
| 239 | end: PrintfFormatString.data() + PrintfFormatString.size(), |
| 240 | LO: LangOpts, Target: Context->getTargetInfo(), isFreeBSDKPrintf: IsFreeBsdkPrintf); |
| 241 | finalizeFormatText(); |
| 242 | } |
| 243 | |
| 244 | std::optional<StringRef> |
| 245 | FormatStringConverter::formatStringContainsUnreplaceableMacro( |
| 246 | const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM, |
| 247 | Preprocessor &PP) { |
| 248 | // If a macro invocation surrounds the entire call then we don't want that to |
| 249 | // inhibit conversion. The whole format string will appear to come from that |
| 250 | // macro, as will the function call. |
| 251 | std::optional<StringRef> MaybeSurroundingMacroName; |
| 252 | if (SourceLocation BeginCallLoc = Call->getBeginLoc(); |
| 253 | BeginCallLoc.isMacroID()) |
| 254 | MaybeSurroundingMacroName = |
| 255 | Lexer::getImmediateMacroName(Loc: BeginCallLoc, SM, LangOpts: PP.getLangOpts()); |
| 256 | |
| 257 | for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end(); |
| 258 | I != E; ++I) { |
| 259 | const SourceLocation &TokenLoc = *I; |
| 260 | if (TokenLoc.isMacroID()) { |
| 261 | const StringRef MacroName = |
| 262 | Lexer::getImmediateMacroName(Loc: TokenLoc, SM, LangOpts: PP.getLangOpts()); |
| 263 | |
| 264 | if (MaybeSurroundingMacroName != MacroName) { |
| 265 | // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes |
| 266 | // for types that change size so we must look for multiple prefixes. |
| 267 | if (!MacroName.starts_with(Prefix: "PRI" ) && !MacroName.starts_with(Prefix: "__PRI" )) |
| 268 | return MacroName; |
| 269 | |
| 270 | const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(Loc: TokenLoc); |
| 271 | const OptionalFileEntryRef MaybeFileEntry = |
| 272 | SM.getFileEntryRefForID(FID: SM.getFileID(SpellingLoc: TokenSpellingLoc)); |
| 273 | if (!MaybeFileEntry) |
| 274 | return MacroName; |
| 275 | |
| 276 | HeaderSearch &HS = PP.getHeaderSearchInfo(); |
| 277 | // Check if the file is a system header |
| 278 | if (!isSystem(CK: HS.getFileDirFlavor(File: *MaybeFileEntry)) || |
| 279 | llvm::sys::path::filename(path: MaybeFileEntry->getName()) != |
| 280 | "inttypes.h" ) |
| 281 | return MacroName; |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | return std::nullopt; |
| 286 | } |
| 287 | |
| 288 | void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, |
| 289 | std::string &FormatSpec) { |
| 290 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
| 291 | |
| 292 | // We only care about alignment if a field width is specified |
| 293 | if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { |
| 294 | if (ArgKind == ConversionSpecifier::sArg) { |
| 295 | // Strings are left-aligned by default with std::format, so we only |
| 296 | // need to emit an alignment if this one needs to be right aligned. |
| 297 | if (!FS.isLeftJustified()) |
| 298 | FormatSpec.push_back(c: '>'); |
| 299 | } else { |
| 300 | // Numbers are right-aligned by default with std::format, so we only |
| 301 | // need to emit an alignment if this one needs to be left aligned. |
| 302 | if (FS.isLeftJustified()) |
| 303 | FormatSpec.push_back(c: '<'); |
| 304 | } |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | void FormatStringConverter::emitSign(const PrintfSpecifier &FS, |
| 309 | std::string &FormatSpec) { |
| 310 | const ConversionSpecifier Spec = FS.getConversionSpecifier(); |
| 311 | |
| 312 | // Ignore on something that isn't numeric. For printf it's would be a |
| 313 | // compile-time warning but ignored at runtime, but for std::format it |
| 314 | // ought to be a compile-time error. |
| 315 | if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { |
| 316 | // + is preferred to ' ' |
| 317 | if (FS.hasPlusPrefix()) |
| 318 | FormatSpec.push_back(c: '+'); |
| 319 | else if (FS.hasSpacePrefix()) |
| 320 | FormatSpec.push_back(c: ' '); |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, |
| 325 | std::string &FormatSpec) { |
| 326 | if (FS.hasAlternativeForm()) { |
| 327 | switch (FS.getConversionSpecifier().getKind()) { |
| 328 | case ConversionSpecifier::Kind::aArg: |
| 329 | case ConversionSpecifier::Kind::AArg: |
| 330 | case ConversionSpecifier::Kind::eArg: |
| 331 | case ConversionSpecifier::Kind::EArg: |
| 332 | case ConversionSpecifier::Kind::fArg: |
| 333 | case ConversionSpecifier::Kind::FArg: |
| 334 | case ConversionSpecifier::Kind::gArg: |
| 335 | case ConversionSpecifier::Kind::GArg: |
| 336 | case ConversionSpecifier::Kind::xArg: |
| 337 | case ConversionSpecifier::Kind::XArg: |
| 338 | case ConversionSpecifier::Kind::oArg: |
| 339 | FormatSpec.push_back(c: '#'); |
| 340 | break; |
| 341 | default: |
| 342 | // Alternative forms don't exist for other argument kinds |
| 343 | break; |
| 344 | } |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, |
| 349 | std::string &FormatSpec) { |
| 350 | { |
| 351 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
| 352 | switch (FieldWidth.getHowSpecified()) { |
| 353 | case OptionalAmount::NotSpecified: |
| 354 | break; |
| 355 | case OptionalAmount::Constant: |
| 356 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getConstantAmount())); |
| 357 | break; |
| 358 | case OptionalAmount::Arg: |
| 359 | FormatSpec.push_back(c: '{'); |
| 360 | if (FieldWidth.usesPositionalArg()) { |
| 361 | // std::format argument identifiers are zero-based, whereas printf |
| 362 | // ones are one based. |
| 363 | assert(FieldWidth.getPositionalArgIndex() > 0U); |
| 364 | FormatSpec.append(str: llvm::utostr(X: FieldWidth.getPositionalArgIndex() - 1)); |
| 365 | } |
| 366 | FormatSpec.push_back(c: '}'); |
| 367 | break; |
| 368 | case OptionalAmount::Invalid: |
| 369 | break; |
| 370 | } |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, |
| 375 | std::string &FormatSpec) { |
| 376 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
| 377 | switch (FieldPrecision.getHowSpecified()) { |
| 378 | case OptionalAmount::NotSpecified: |
| 379 | break; |
| 380 | case OptionalAmount::Constant: |
| 381 | FormatSpec.push_back(c: '.'); |
| 382 | FormatSpec.append(str: llvm::utostr(X: FieldPrecision.getConstantAmount())); |
| 383 | break; |
| 384 | case OptionalAmount::Arg: |
| 385 | FormatSpec.push_back(c: '.'); |
| 386 | FormatSpec.push_back(c: '{'); |
| 387 | if (FieldPrecision.usesPositionalArg()) { |
| 388 | // std::format argument identifiers are zero-based, whereas printf |
| 389 | // ones are one based. |
| 390 | assert(FieldPrecision.getPositionalArgIndex() > 0U); |
| 391 | FormatSpec.append( |
| 392 | str: llvm::utostr(X: FieldPrecision.getPositionalArgIndex() - 1)); |
| 393 | } |
| 394 | FormatSpec.push_back(c: '}'); |
| 395 | break; |
| 396 | case OptionalAmount::Invalid: |
| 397 | break; |
| 398 | } |
| 399 | } |
| 400 | |
| 401 | void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { |
| 402 | unsigned ArgCount = 0; |
| 403 | const OptionalAmount FieldWidth = FS.getFieldWidth(); |
| 404 | const OptionalAmount FieldPrecision = FS.getPrecision(); |
| 405 | |
| 406 | if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && |
| 407 | !FieldWidth.usesPositionalArg()) |
| 408 | ++ArgCount; |
| 409 | if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && |
| 410 | !FieldPrecision.usesPositionalArg()) |
| 411 | ++ArgCount; |
| 412 | |
| 413 | if (ArgCount) |
| 414 | ArgRotates.emplace_back(args: FS.getArgIndex() + ArgsOffset, args&: ArgCount); |
| 415 | } |
| 416 | |
| 417 | void FormatStringConverter::emitStringArgument(unsigned ArgIndex, |
| 418 | const Expr *Arg) { |
| 419 | // If the argument is the result of a call to std::string::c_str() or |
| 420 | // data() with a return type of char then we can remove that call and |
| 421 | // pass the std::string directly. We don't want to do so if the return |
| 422 | // type is not a char pointer (though it's unlikely that such code would |
| 423 | // compile without warnings anyway.) See RedundantStringCStrCheck. |
| 424 | |
| 425 | if (!StringCStrCallExprMatcher) { |
| 426 | // Lazily create the matcher |
| 427 | const auto StringDecl = type(hasUnqualifiedDesugaredType(InnerMatcher: recordType( |
| 428 | hasDeclaration(InnerMatcher: cxxRecordDecl(hasName(Name: "::std::basic_string" )))))); |
| 429 | const auto StringExpr = expr( |
| 430 | anyOf(hasType(InnerMatcher: StringDecl), hasType(InnerMatcher: qualType(pointsTo(InnerMatcher: StringDecl))))); |
| 431 | |
| 432 | StringCStrCallExprMatcher = |
| 433 | cxxMemberCallExpr( |
| 434 | on(InnerMatcher: StringExpr.bind(ID: "arg" )), callee(InnerMatcher: memberExpr().bind(ID: "member" )), |
| 435 | callee(InnerMatcher: cxxMethodDecl(hasAnyName("c_str" , "data" ), |
| 436 | returns(InnerMatcher: pointerType(pointee(isRealChar())))))) |
| 437 | .bind(ID: "call" ); |
| 438 | } |
| 439 | |
| 440 | auto CStrMatches = match(Matcher: *StringCStrCallExprMatcher, Node: *Arg, Context&: *Context); |
| 441 | if (CStrMatches.size() == 1) |
| 442 | ArgCStrRemovals.push_back(x: CStrMatches.front()); |
| 443 | else if (Arg->getType()->isPointerType()) { |
| 444 | const QualType Pointee = Arg->getType()->getPointeeType(); |
| 445 | // printf is happy to print signed char and unsigned char strings, but |
| 446 | // std::format only likes char strings. |
| 447 | if (Pointee->isCharType() && !isRealCharType(Ty: Pointee)) |
| 448 | ArgFixes.emplace_back(args&: ArgIndex, args: "reinterpret_cast<const char *>(" ); |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | bool FormatStringConverter::emitIntegerArgument( |
| 453 | ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, |
| 454 | std::string &FormatSpec) { |
| 455 | const clang::QualType &ArgType = Arg->getType(); |
| 456 | if (ArgType->isBooleanType()) { |
| 457 | // std::format will print bool as either "true" or "false" by default, |
| 458 | // but printf prints them as "0" or "1". Be compatible with printf by |
| 459 | // requesting decimal output. |
| 460 | FormatSpec.push_back(c: 'd'); |
| 461 | } else if (ArgType->isEnumeralType()) { |
| 462 | // std::format will try to find a specialization to print the enum |
| 463 | // (and probably fail), whereas printf would have just expected it to |
| 464 | // be passed as its underlying type. However, printf will have forced |
| 465 | // the signedness based on the format string, so we need to do the |
| 466 | // same. |
| 467 | if (const auto *ET = ArgType->getAs<EnumType>()) { |
| 468 | if (const std::optional<std::string> MaybeCastType = |
| 469 | castTypeForArgument(ArgKind, QT: ET->getDecl()->getIntegerType())) |
| 470 | ArgFixes.emplace_back( |
| 471 | args&: ArgIndex, args: (Twine("static_cast<" ) + *MaybeCastType + ">(" ).str()); |
| 472 | else |
| 473 | return conversionNotPossible( |
| 474 | Reason: (Twine("argument " ) + Twine(ArgIndex) + " has unexpected enum type" ) |
| 475 | .str()); |
| 476 | } |
| 477 | } else if (CastMismatchedIntegerTypes && |
| 478 | !isMatchingSignedness(ArgKind, ArgType)) { |
| 479 | // printf will happily print an unsigned type as signed if told to. |
| 480 | // Even -Wformat doesn't warn for this. std::format will format as |
| 481 | // unsigned unless we cast it. |
| 482 | if (const std::optional<std::string> MaybeCastType = |
| 483 | castTypeForArgument(ArgKind, QT: ArgType)) |
| 484 | ArgFixes.emplace_back( |
| 485 | args&: ArgIndex, args: (Twine("static_cast<" ) + *MaybeCastType + ">(" ).str()); |
| 486 | else |
| 487 | return conversionNotPossible( |
| 488 | Reason: (Twine("argument " ) + Twine(ArgIndex) + " cannot be cast to " + |
| 489 | Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" |
| 490 | : "signed" ) + |
| 491 | " integer type to match format" |
| 492 | " specifier and StrictMode is enabled" ) |
| 493 | .str()); |
| 494 | } else if (isRealCharType(Ty: ArgType) || !ArgType->isIntegerType()) { |
| 495 | // Only specify integer if the argument is of a different type |
| 496 | FormatSpec.push_back(c: 'd'); |
| 497 | } |
| 498 | return true; |
| 499 | } |
| 500 | |
| 501 | /// Append the corresponding standard format string type fragment to FormatSpec, |
| 502 | /// and store any argument fixes for later application. |
| 503 | /// @returns true on success, false on failure |
| 504 | bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, |
| 505 | std::string &FormatSpec) { |
| 506 | ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); |
| 507 | switch (ArgKind) { |
| 508 | case ConversionSpecifier::Kind::sArg: |
| 509 | emitStringArgument(ArgIndex: FS.getArgIndex() + ArgsOffset, Arg); |
| 510 | break; |
| 511 | case ConversionSpecifier::Kind::cArg: |
| 512 | // The type must be "c" to get a character unless the type is exactly |
| 513 | // char (whether that be signed or unsigned for the target.) |
| 514 | if (!isRealCharType(Ty: Arg->getType())) |
| 515 | FormatSpec.push_back(c: 'c'); |
| 516 | break; |
| 517 | case ConversionSpecifier::Kind::dArg: |
| 518 | case ConversionSpecifier::Kind::iArg: |
| 519 | case ConversionSpecifier::Kind::uArg: |
| 520 | if (!emitIntegerArgument(ArgKind, Arg, ArgIndex: FS.getArgIndex() + ArgsOffset, |
| 521 | FormatSpec)) |
| 522 | return false; |
| 523 | break; |
| 524 | case ConversionSpecifier::Kind::pArg: { |
| 525 | const clang::QualType &ArgType = Arg->getType(); |
| 526 | // std::format knows how to format void pointers and nullptrs |
| 527 | if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) |
| 528 | ArgFixes.emplace_back(args: FS.getArgIndex() + ArgsOffset, |
| 529 | args: "static_cast<const void *>(" ); |
| 530 | break; |
| 531 | } |
| 532 | case ConversionSpecifier::Kind::xArg: |
| 533 | FormatSpec.push_back(c: 'x'); |
| 534 | break; |
| 535 | case ConversionSpecifier::Kind::XArg: |
| 536 | FormatSpec.push_back(c: 'X'); |
| 537 | break; |
| 538 | case ConversionSpecifier::Kind::oArg: |
| 539 | FormatSpec.push_back(c: 'o'); |
| 540 | break; |
| 541 | case ConversionSpecifier::Kind::aArg: |
| 542 | FormatSpec.push_back(c: 'a'); |
| 543 | break; |
| 544 | case ConversionSpecifier::Kind::AArg: |
| 545 | FormatSpec.push_back(c: 'A'); |
| 546 | break; |
| 547 | case ConversionSpecifier::Kind::eArg: |
| 548 | FormatSpec.push_back(c: 'e'); |
| 549 | break; |
| 550 | case ConversionSpecifier::Kind::EArg: |
| 551 | FormatSpec.push_back(c: 'E'); |
| 552 | break; |
| 553 | case ConversionSpecifier::Kind::fArg: |
| 554 | FormatSpec.push_back(c: 'f'); |
| 555 | break; |
| 556 | case ConversionSpecifier::Kind::FArg: |
| 557 | FormatSpec.push_back(c: 'F'); |
| 558 | break; |
| 559 | case ConversionSpecifier::Kind::gArg: |
| 560 | FormatSpec.push_back(c: 'g'); |
| 561 | break; |
| 562 | case ConversionSpecifier::Kind::GArg: |
| 563 | FormatSpec.push_back(c: 'G'); |
| 564 | break; |
| 565 | default: |
| 566 | // Something we don't understand |
| 567 | return conversionNotPossible(Reason: (Twine("argument " ) + |
| 568 | Twine(FS.getArgIndex() + ArgsOffset) + |
| 569 | " has an unsupported format specifier" ) |
| 570 | .str()); |
| 571 | } |
| 572 | |
| 573 | return true; |
| 574 | } |
| 575 | |
| 576 | /// Append the standard format string equivalent of the passed PrintfSpecifier |
| 577 | /// to StandardFormatString and store any argument fixes for later application. |
| 578 | /// @returns true on success, false on failure |
| 579 | bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, |
| 580 | const Expr *Arg, |
| 581 | std::string &StandardFormatString) { |
| 582 | // The specifier must have an associated argument |
| 583 | assert(FS.consumesDataArgument()); |
| 584 | |
| 585 | StandardFormatString.push_back(c: '{'); |
| 586 | |
| 587 | if (FS.usesPositionalArg()) { |
| 588 | // std::format argument identifiers are zero-based, whereas printf ones |
| 589 | // are one based. |
| 590 | assert(FS.getPositionalArgIndex() > 0U); |
| 591 | StandardFormatString.append(str: llvm::utostr(X: FS.getPositionalArgIndex() - 1)); |
| 592 | } |
| 593 | |
| 594 | // std::format format argument parts to potentially emit: |
| 595 | // [[fill]align][sign]["#"]["0"][width]["."precision][type] |
| 596 | std::string FormatSpec; |
| 597 | |
| 598 | // printf doesn't support specifying the fill character - it's always a |
| 599 | // space, so we never need to generate one. |
| 600 | |
| 601 | emitAlignment(FS, FormatSpec); |
| 602 | emitSign(FS, FormatSpec); |
| 603 | emitAlternativeForm(FS, FormatSpec); |
| 604 | |
| 605 | if (FS.hasLeadingZeros()) |
| 606 | FormatSpec.push_back(c: '0'); |
| 607 | |
| 608 | emitFieldWidth(FS, FormatSpec); |
| 609 | emitPrecision(FS, FormatSpec); |
| 610 | maybeRotateArguments(FS); |
| 611 | |
| 612 | if (!emitType(FS, Arg, FormatSpec)) |
| 613 | return false; |
| 614 | |
| 615 | if (!FormatSpec.empty()) { |
| 616 | StandardFormatString.push_back(c: ':'); |
| 617 | StandardFormatString.append(str: FormatSpec); |
| 618 | } |
| 619 | |
| 620 | StandardFormatString.push_back(c: '}'); |
| 621 | return true; |
| 622 | } |
| 623 | |
| 624 | /// Called for each format specifier by ParsePrintfString. |
| 625 | bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, |
| 626 | const char *StartSpecifier, |
| 627 | unsigned SpecifierLen, |
| 628 | const TargetInfo &Target) { |
| 629 | |
| 630 | const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); |
| 631 | assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); |
| 632 | |
| 633 | // Everything before the specifier needs copying verbatim |
| 634 | assert(StartSpecifierPos >= PrintfFormatStringPos); |
| 635 | |
| 636 | appendFormatText(Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
| 637 | StartSpecifierPos - PrintfFormatStringPos)); |
| 638 | |
| 639 | const ConversionSpecifier::Kind ArgKind = |
| 640 | FS.getConversionSpecifier().getKind(); |
| 641 | |
| 642 | // Skip over specifier |
| 643 | PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; |
| 644 | assert(PrintfFormatStringPos <= PrintfFormatString.size()); |
| 645 | |
| 646 | FormatStringNeededRewriting = true; |
| 647 | |
| 648 | if (ArgKind == ConversionSpecifier::Kind::nArg) { |
| 649 | // std::print doesn't do the equivalent of %n |
| 650 | return conversionNotPossible(Reason: "'%n' is not supported in format string" ); |
| 651 | } |
| 652 | |
| 653 | if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { |
| 654 | // std::print doesn't support %m. In theory we could insert a |
| 655 | // strerror(errno) parameter (assuming that libc has a thread-safe |
| 656 | // implementation, which glibc does), but that would require keeping track |
| 657 | // of the input and output parameter indices for position arguments too. |
| 658 | return conversionNotPossible(Reason: "'%m' is not supported in format string" ); |
| 659 | } |
| 660 | |
| 661 | if (ArgKind == ConversionSpecifier::PercentArg) { |
| 662 | StandardFormatString.push_back(c: '%'); |
| 663 | return true; |
| 664 | } |
| 665 | |
| 666 | const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; |
| 667 | if (ArgIndex >= NumArgs) { |
| 668 | // Argument index out of range. Give up. |
| 669 | return conversionNotPossible( |
| 670 | Reason: (Twine("argument index " ) + Twine(ArgIndex) + " is out of range" ) |
| 671 | .str()); |
| 672 | } |
| 673 | |
| 674 | return convertArgument(FS, Arg: Args[ArgIndex]->IgnoreImplicitAsWritten(), |
| 675 | StandardFormatString); |
| 676 | } |
| 677 | |
| 678 | /// Called at the very end just before applying fixes to capture the last part |
| 679 | /// of the format string. |
| 680 | void FormatStringConverter::finalizeFormatText() { |
| 681 | appendFormatText( |
| 682 | Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, |
| 683 | PrintfFormatString.size() - PrintfFormatStringPos)); |
| 684 | PrintfFormatStringPos = PrintfFormatString.size(); |
| 685 | |
| 686 | // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") |
| 687 | // than to std::println("Hello\r"); |
| 688 | // Use StringRef until C++20 std::string::ends_with() is available. |
| 689 | const auto StandardFormatStringRef = StringRef(StandardFormatString); |
| 690 | if (Config.AllowTrailingNewlineRemoval && |
| 691 | StandardFormatStringRef.ends_with(Suffix: "\\n" ) && |
| 692 | !StandardFormatStringRef.ends_with(Suffix: "\\\\n" ) && |
| 693 | !StandardFormatStringRef.ends_with(Suffix: "\\r\\n" )) { |
| 694 | UsePrintNewlineFunction = true; |
| 695 | FormatStringNeededRewriting = true; |
| 696 | StandardFormatString.erase(first: StandardFormatString.end() - 2, |
| 697 | last: StandardFormatString.end()); |
| 698 | } |
| 699 | |
| 700 | StandardFormatString.push_back(c: '\"'); |
| 701 | } |
| 702 | |
| 703 | /// Append literal parts of the format text, reinstating escapes as required. |
| 704 | void FormatStringConverter::appendFormatText(const StringRef Text) { |
| 705 | for (const char Ch : Text) { |
| 706 | if (Ch == '\a') |
| 707 | StandardFormatString += "\\a" ; |
| 708 | else if (Ch == '\b') |
| 709 | StandardFormatString += "\\b" ; |
| 710 | else if (Ch == '\f') |
| 711 | StandardFormatString += "\\f" ; |
| 712 | else if (Ch == '\n') |
| 713 | StandardFormatString += "\\n" ; |
| 714 | else if (Ch == '\r') |
| 715 | StandardFormatString += "\\r" ; |
| 716 | else if (Ch == '\t') |
| 717 | StandardFormatString += "\\t" ; |
| 718 | else if (Ch == '\v') |
| 719 | StandardFormatString += "\\v" ; |
| 720 | else if (Ch == '\"') |
| 721 | StandardFormatString += "\\\"" ; |
| 722 | else if (Ch == '\\') |
| 723 | StandardFormatString += "\\\\" ; |
| 724 | else if (Ch == '{') { |
| 725 | StandardFormatString += "{{" ; |
| 726 | FormatStringNeededRewriting = true; |
| 727 | } else if (Ch == '}') { |
| 728 | StandardFormatString += "}}" ; |
| 729 | FormatStringNeededRewriting = true; |
| 730 | } else if (Ch < 32) { |
| 731 | StandardFormatString += "\\x" ; |
| 732 | StandardFormatString += llvm::hexdigit(X: Ch >> 4, LowerCase: true); |
| 733 | StandardFormatString += llvm::hexdigit(X: Ch & 0xf, LowerCase: true); |
| 734 | } else |
| 735 | StandardFormatString += Ch; |
| 736 | } |
| 737 | } |
| 738 | |
| 739 | static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, |
| 740 | ASTContext &Context) { |
| 741 | const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>(ID: "arg" ); |
| 742 | const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>(ID: "member" ); |
| 743 | const bool Arrow = Member->isArrow(); |
| 744 | return Arrow ? utils::fixit::formatDereference(ExprNode: *Arg, Context) |
| 745 | : tooling::fixit::getText(Node: *Arg, Context).str(); |
| 746 | } |
| 747 | |
| 748 | /// Called by the check when it is ready to apply the fixes. |
| 749 | void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, |
| 750 | SourceManager &SM) { |
| 751 | if (FormatStringNeededRewriting) { |
| 752 | Diag << FixItHint::CreateReplacement( |
| 753 | RemoveRange: CharSourceRange::getTokenRange(B: FormatExpr->getBeginLoc(), |
| 754 | E: FormatExpr->getEndLoc()), |
| 755 | Code: StandardFormatString); |
| 756 | } |
| 757 | |
| 758 | // ArgCount is one less than the number of arguments to be rotated. |
| 759 | for (auto [ValueArgIndex, ArgCount] : ArgRotates) { |
| 760 | assert(ValueArgIndex < NumArgs); |
| 761 | assert(ValueArgIndex > ArgCount); |
| 762 | |
| 763 | // First move the value argument to the right place. But if there's a |
| 764 | // pending c_str() removal then we must do that at the same time. |
| 765 | if (const auto CStrRemovalMatch = |
| 766 | std::find_if(first: ArgCStrRemovals.cbegin(), last: ArgCStrRemovals.cend(), |
| 767 | pred: [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( |
| 768 | const BoundNodes &Match) { |
| 769 | // This c_str() removal corresponds to the argument |
| 770 | // being moved if they start at the same location. |
| 771 | const Expr *CStrArg = Match.getNodeAs<Expr>(ID: "arg" ); |
| 772 | return ArgStartPos == CStrArg->getBeginLoc(); |
| 773 | }); |
| 774 | CStrRemovalMatch != ArgCStrRemovals.end()) { |
| 775 | const std::string ArgText = |
| 776 | withoutCStrReplacement(CStrRemovalMatch: *CStrRemovalMatch, Context&: *Context); |
| 777 | assert(!ArgText.empty()); |
| 778 | |
| 779 | Diag << FixItHint::CreateReplacement( |
| 780 | Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); |
| 781 | |
| 782 | // That c_str() removal is now dealt with, so we don't need to do it again |
| 783 | ArgCStrRemovals.erase(position: CStrRemovalMatch); |
| 784 | } else |
| 785 | Diag << tooling::fixit::createReplacement(Destination: *Args[ValueArgIndex - ArgCount], |
| 786 | Source: *Args[ValueArgIndex], Context: *Context); |
| 787 | |
| 788 | // Now shift down the field width and precision (if either are present) to |
| 789 | // accommodate it. |
| 790 | for (size_t Offset = 0; Offset < ArgCount; ++Offset) |
| 791 | Diag << tooling::fixit::createReplacement( |
| 792 | Destination: *Args[ValueArgIndex - Offset], Source: *Args[ValueArgIndex - Offset - 1], |
| 793 | Context: *Context); |
| 794 | |
| 795 | // Now we need to modify the ArgFix index too so that we fix the right |
| 796 | // argument. We don't need to care about the width and precision indices |
| 797 | // since they never need fixing. |
| 798 | for (auto &ArgFix : ArgFixes) { |
| 799 | if (ArgFix.ArgIndex == ValueArgIndex) |
| 800 | ArgFix.ArgIndex = ValueArgIndex - ArgCount; |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | for (const auto &[ArgIndex, Replacement] : ArgFixes) { |
| 805 | SourceLocation AfterOtherSide = |
| 806 | Lexer::findNextToken(Loc: Args[ArgIndex]->getEndLoc(), SM, LangOpts) |
| 807 | ->getLocation(); |
| 808 | |
| 809 | Diag << FixItHint::CreateInsertion(InsertionLoc: Args[ArgIndex]->getBeginLoc(), |
| 810 | Code: Replacement, BeforePreviousInsertions: true) |
| 811 | << FixItHint::CreateInsertion(InsertionLoc: AfterOtherSide, Code: ")" , BeforePreviousInsertions: true); |
| 812 | } |
| 813 | |
| 814 | for (const auto &Match : ArgCStrRemovals) { |
| 815 | const auto *Call = Match.getNodeAs<CallExpr>(ID: "call" ); |
| 816 | const std::string ArgText = withoutCStrReplacement(CStrRemovalMatch: Match, Context&: *Context); |
| 817 | if (!ArgText.empty()) |
| 818 | Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); |
| 819 | } |
| 820 | } |
| 821 | } // namespace clang::tidy::utils |
| 822 | |