| 1 | //===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SuspiciousMissingCommaCheck.h" |
| 10 | #include "clang/AST/ASTContext.h" |
| 11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 12 | |
| 13 | using namespace clang::ast_matchers; |
| 14 | |
| 15 | namespace clang::tidy::bugprone { |
| 16 | |
| 17 | namespace { |
| 18 | |
| 19 | bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx, |
| 20 | const StringLiteral *Lit) { |
| 21 | // String literals surrounded by parentheses are assumed to be on purpose. |
| 22 | // i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] }; |
| 23 | |
| 24 | TraversalKindScope RAII(*Ctx, TK_AsIs); |
| 25 | auto Parents = Ctx->getParents(Node: *Lit); |
| 26 | if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr) |
| 27 | return true; |
| 28 | |
| 29 | // Appropriately indented string literals are assumed to be on purpose. |
| 30 | // The following frequent indentation is accepted: |
| 31 | // const char* Array[] = { |
| 32 | // "first literal" |
| 33 | // "indented literal" |
| 34 | // "indented literal", |
| 35 | // "second literal", |
| 36 | // [...] |
| 37 | // }; |
| 38 | const SourceManager &SM = Ctx->getSourceManager(); |
| 39 | bool IndentedCorrectly = true; |
| 40 | SourceLocation FirstToken = Lit->getStrTokenLoc(TokNum: 0); |
| 41 | FileID BaseFID = SM.getFileID(SpellingLoc: FirstToken); |
| 42 | unsigned int BaseIndent = SM.getSpellingColumnNumber(Loc: FirstToken); |
| 43 | unsigned int BaseLine = SM.getSpellingLineNumber(Loc: FirstToken); |
| 44 | for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) { |
| 45 | SourceLocation Token = Lit->getStrTokenLoc(TokNum); |
| 46 | FileID FID = SM.getFileID(SpellingLoc: Token); |
| 47 | unsigned int Indent = SM.getSpellingColumnNumber(Loc: Token); |
| 48 | unsigned int Line = SM.getSpellingLineNumber(Loc: Token); |
| 49 | if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) { |
| 50 | IndentedCorrectly = false; |
| 51 | break; |
| 52 | } |
| 53 | } |
| 54 | if (IndentedCorrectly) |
| 55 | return true; |
| 56 | |
| 57 | // There is no pattern recognized by the checker, assume it's not on purpose. |
| 58 | return false; |
| 59 | } |
| 60 | |
| 61 | AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned, |
| 62 | MaxConcatenatedTokens) { |
| 63 | return Node.getNumConcatenated() > 1 && |
| 64 | Node.getNumConcatenated() < MaxConcatenatedTokens && |
| 65 | !isConcatenatedLiteralsOnPurpose(Ctx: &Finder->getASTContext(), Lit: &Node); |
| 66 | } |
| 67 | |
| 68 | } // namespace |
| 69 | |
| 70 | SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck( |
| 71 | StringRef Name, ClangTidyContext *Context) |
| 72 | : ClangTidyCheck(Name, Context), |
| 73 | SizeThreshold(Options.get(LocalName: "SizeThreshold" , Default: 5U)), |
| 74 | RatioThreshold(std::stod(str: Options.get(LocalName: "RatioThreshold" , Default: ".2" ).str())), |
| 75 | MaxConcatenatedTokens(Options.get(LocalName: "MaxConcatenatedTokens" , Default: 5U)) {} |
| 76 | |
| 77 | void SuspiciousMissingCommaCheck::storeOptions( |
| 78 | ClangTidyOptions::OptionMap &Opts) { |
| 79 | Options.store(Options&: Opts, LocalName: "SizeThreshold" , Value: SizeThreshold); |
| 80 | Options.store(Options&: Opts, LocalName: "RatioThreshold" , Value: std::to_string(val: RatioThreshold)); |
| 81 | Options.store(Options&: Opts, LocalName: "MaxConcatenatedTokens" , Value: MaxConcatenatedTokens); |
| 82 | } |
| 83 | |
| 84 | void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) { |
| 85 | const auto ConcatenatedStringLiteral = |
| 86 | stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind(ID: "str" ); |
| 87 | |
| 88 | const auto StringsInitializerList = |
| 89 | initListExpr(hasType(InnerMatcher: constantArrayType()), |
| 90 | has(ignoringParenImpCasts(InnerMatcher: expr(ConcatenatedStringLiteral)))); |
| 91 | |
| 92 | Finder->addMatcher(NodeMatch: StringsInitializerList.bind(ID: "list" ), Action: this); |
| 93 | } |
| 94 | |
| 95 | void SuspiciousMissingCommaCheck::check( |
| 96 | const MatchFinder::MatchResult &Result) { |
| 97 | const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>(ID: "list" ); |
| 98 | const auto *ConcatenatedLiteral = |
| 99 | Result.Nodes.getNodeAs<StringLiteral>(ID: "str" ); |
| 100 | assert(InitializerList && ConcatenatedLiteral); |
| 101 | |
| 102 | // Skip small arrays as they often generate false-positive. |
| 103 | unsigned int Size = InitializerList->getNumInits(); |
| 104 | if (Size < SizeThreshold) |
| 105 | return; |
| 106 | |
| 107 | // Count the number of occurrence of concatenated string literal. |
| 108 | unsigned int Count = 0; |
| 109 | for (unsigned int I = 0; I < Size; ++I) { |
| 110 | const Expr *Child = InitializerList->getInit(Init: I)->IgnoreImpCasts(); |
| 111 | if (const auto *Literal = dyn_cast<StringLiteral>(Val: Child)) { |
| 112 | if (Literal->getNumConcatenated() > 1) |
| 113 | ++Count; |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | // Warn only when concatenation is not common in this initializer list. |
| 118 | // The current threshold is set to less than 1/5 of the string literals. |
| 119 | if (double(Count) / Size > RatioThreshold) |
| 120 | return; |
| 121 | |
| 122 | diag(Loc: ConcatenatedLiteral->getBeginLoc(), |
| 123 | Description: "suspicious string literal, probably missing a comma" ); |
| 124 | } |
| 125 | |
| 126 | } // namespace clang::tidy::bugprone |
| 127 | |