1 | //===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SuspiciousMissingCommaCheck.h" |
10 | #include "clang/AST/ASTContext.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | |
13 | using namespace clang::ast_matchers; |
14 | |
15 | namespace clang::tidy::bugprone { |
16 | |
17 | namespace { |
18 | |
19 | bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx, |
20 | const StringLiteral *Lit) { |
21 | // String literals surrounded by parentheses are assumed to be on purpose. |
22 | // i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] }; |
23 | |
24 | TraversalKindScope RAII(*Ctx, TK_AsIs); |
25 | auto Parents = Ctx->getParents(Node: *Lit); |
26 | if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr) |
27 | return true; |
28 | |
29 | // Appropriately indented string literals are assumed to be on purpose. |
30 | // The following frequent indentation is accepted: |
31 | // const char* Array[] = { |
32 | // "first literal" |
33 | // "indented literal" |
34 | // "indented literal", |
35 | // "second literal", |
36 | // [...] |
37 | // }; |
38 | const SourceManager &SM = Ctx->getSourceManager(); |
39 | bool IndentedCorrectly = true; |
40 | SourceLocation FirstToken = Lit->getStrTokenLoc(TokNum: 0); |
41 | FileID BaseFID = SM.getFileID(SpellingLoc: FirstToken); |
42 | unsigned int BaseIndent = SM.getSpellingColumnNumber(Loc: FirstToken); |
43 | unsigned int BaseLine = SM.getSpellingLineNumber(Loc: FirstToken); |
44 | for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) { |
45 | SourceLocation Token = Lit->getStrTokenLoc(TokNum); |
46 | FileID FID = SM.getFileID(SpellingLoc: Token); |
47 | unsigned int Indent = SM.getSpellingColumnNumber(Loc: Token); |
48 | unsigned int Line = SM.getSpellingLineNumber(Loc: Token); |
49 | if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) { |
50 | IndentedCorrectly = false; |
51 | break; |
52 | } |
53 | } |
54 | if (IndentedCorrectly) |
55 | return true; |
56 | |
57 | // There is no pattern recognized by the checker, assume it's not on purpose. |
58 | return false; |
59 | } |
60 | |
61 | AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned, |
62 | MaxConcatenatedTokens) { |
63 | return Node.getNumConcatenated() > 1 && |
64 | Node.getNumConcatenated() < MaxConcatenatedTokens && |
65 | !isConcatenatedLiteralsOnPurpose(Ctx: &Finder->getASTContext(), Lit: &Node); |
66 | } |
67 | |
68 | } // namespace |
69 | |
70 | SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck( |
71 | StringRef Name, ClangTidyContext *Context) |
72 | : ClangTidyCheck(Name, Context), |
73 | SizeThreshold(Options.get(LocalName: "SizeThreshold" , Default: 5U)), |
74 | RatioThreshold(std::stod(str: Options.get(LocalName: "RatioThreshold" , Default: ".2" ).str())), |
75 | MaxConcatenatedTokens(Options.get(LocalName: "MaxConcatenatedTokens" , Default: 5U)) {} |
76 | |
77 | void SuspiciousMissingCommaCheck::storeOptions( |
78 | ClangTidyOptions::OptionMap &Opts) { |
79 | Options.store(Options&: Opts, LocalName: "SizeThreshold" , Value: SizeThreshold); |
80 | Options.store(Options&: Opts, LocalName: "RatioThreshold" , Value: std::to_string(val: RatioThreshold)); |
81 | Options.store(Options&: Opts, LocalName: "MaxConcatenatedTokens" , Value: MaxConcatenatedTokens); |
82 | } |
83 | |
84 | void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) { |
85 | const auto ConcatenatedStringLiteral = |
86 | stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind(ID: "str" ); |
87 | |
88 | const auto StringsInitializerList = |
89 | initListExpr(hasType(InnerMatcher: constantArrayType()), |
90 | has(ignoringParenImpCasts(InnerMatcher: expr(ConcatenatedStringLiteral)))); |
91 | |
92 | Finder->addMatcher(NodeMatch: StringsInitializerList.bind(ID: "list" ), Action: this); |
93 | } |
94 | |
95 | void SuspiciousMissingCommaCheck::check( |
96 | const MatchFinder::MatchResult &Result) { |
97 | const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>(ID: "list" ); |
98 | const auto *ConcatenatedLiteral = |
99 | Result.Nodes.getNodeAs<StringLiteral>(ID: "str" ); |
100 | assert(InitializerList && ConcatenatedLiteral); |
101 | |
102 | // Skip small arrays as they often generate false-positive. |
103 | unsigned int Size = InitializerList->getNumInits(); |
104 | if (Size < SizeThreshold) |
105 | return; |
106 | |
107 | // Count the number of occurrence of concatenated string literal. |
108 | unsigned int Count = 0; |
109 | for (unsigned int I = 0; I < Size; ++I) { |
110 | const Expr *Child = InitializerList->getInit(Init: I)->IgnoreImpCasts(); |
111 | if (const auto *Literal = dyn_cast<StringLiteral>(Val: Child)) { |
112 | if (Literal->getNumConcatenated() > 1) |
113 | ++Count; |
114 | } |
115 | } |
116 | |
117 | // Warn only when concatenation is not common in this initializer list. |
118 | // The current threshold is set to less than 1/5 of the string literals. |
119 | if (double(Count) / Size > RatioThreshold) |
120 | return; |
121 | |
122 | diag(Loc: ConcatenatedLiteral->getBeginLoc(), |
123 | Description: "suspicious string literal, probably missing a comma" ); |
124 | } |
125 | |
126 | } // namespace clang::tidy::bugprone |
127 | |