1//===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SuspiciousMissingCommaCheck.h"
10#include "clang/AST/ASTContext.h"
11#include "clang/ASTMatchers/ASTMatchFinder.h"
12
13using namespace clang::ast_matchers;
14
15namespace clang::tidy::bugprone {
16
17namespace {
18
19bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
20 const StringLiteral *Lit) {
21 // String literals surrounded by parentheses are assumed to be on purpose.
22 // i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
23
24 TraversalKindScope RAII(*Ctx, TK_AsIs);
25 auto Parents = Ctx->getParents(Node: *Lit);
26 if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
27 return true;
28
29 // Appropriately indented string literals are assumed to be on purpose.
30 // The following frequent indentation is accepted:
31 // const char* Array[] = {
32 // "first literal"
33 // "indented literal"
34 // "indented literal",
35 // "second literal",
36 // [...]
37 // };
38 const SourceManager &SM = Ctx->getSourceManager();
39 bool IndentedCorrectly = true;
40 SourceLocation FirstToken = Lit->getStrTokenLoc(TokNum: 0);
41 FileID BaseFID = SM.getFileID(SpellingLoc: FirstToken);
42 unsigned int BaseIndent = SM.getSpellingColumnNumber(Loc: FirstToken);
43 unsigned int BaseLine = SM.getSpellingLineNumber(Loc: FirstToken);
44 for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) {
45 SourceLocation Token = Lit->getStrTokenLoc(TokNum);
46 FileID FID = SM.getFileID(SpellingLoc: Token);
47 unsigned int Indent = SM.getSpellingColumnNumber(Loc: Token);
48 unsigned int Line = SM.getSpellingLineNumber(Loc: Token);
49 if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
50 IndentedCorrectly = false;
51 break;
52 }
53 }
54 if (IndentedCorrectly)
55 return true;
56
57 // There is no pattern recognized by the checker, assume it's not on purpose.
58 return false;
59}
60
61AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
62 MaxConcatenatedTokens) {
63 return Node.getNumConcatenated() > 1 &&
64 Node.getNumConcatenated() < MaxConcatenatedTokens &&
65 !isConcatenatedLiteralsOnPurpose(Ctx: &Finder->getASTContext(), Lit: &Node);
66}
67
68} // namespace
69
70SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
71 StringRef Name, ClangTidyContext *Context)
72 : ClangTidyCheck(Name, Context),
73 SizeThreshold(Options.get(LocalName: "SizeThreshold", Default: 5U)),
74 RatioThreshold(std::stod(str: Options.get(LocalName: "RatioThreshold", Default: ".2").str())),
75 MaxConcatenatedTokens(Options.get(LocalName: "MaxConcatenatedTokens", Default: 5U)) {}
76
77void SuspiciousMissingCommaCheck::storeOptions(
78 ClangTidyOptions::OptionMap &Opts) {
79 Options.store(Options&: Opts, LocalName: "SizeThreshold", Value: SizeThreshold);
80 Options.store(Options&: Opts, LocalName: "RatioThreshold", Value: std::to_string(val: RatioThreshold));
81 Options.store(Options&: Opts, LocalName: "MaxConcatenatedTokens", Value: MaxConcatenatedTokens);
82}
83
84void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
85 const auto ConcatenatedStringLiteral =
86 stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind(ID: "str");
87
88 const auto StringsInitializerList =
89 initListExpr(hasType(InnerMatcher: constantArrayType()),
90 has(ignoringParenImpCasts(InnerMatcher: expr(ConcatenatedStringLiteral))));
91
92 Finder->addMatcher(NodeMatch: StringsInitializerList.bind(ID: "list"), Action: this);
93}
94
95void SuspiciousMissingCommaCheck::check(
96 const MatchFinder::MatchResult &Result) {
97 const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>(ID: "list");
98 const auto *ConcatenatedLiteral =
99 Result.Nodes.getNodeAs<StringLiteral>(ID: "str");
100 assert(InitializerList && ConcatenatedLiteral);
101
102 // Skip small arrays as they often generate false-positive.
103 unsigned int Size = InitializerList->getNumInits();
104 if (Size < SizeThreshold)
105 return;
106
107 // Count the number of occurrence of concatenated string literal.
108 unsigned int Count = 0;
109 for (unsigned int I = 0; I < Size; ++I) {
110 const Expr *Child = InitializerList->getInit(Init: I)->IgnoreImpCasts();
111 if (const auto *Literal = dyn_cast<StringLiteral>(Val: Child)) {
112 if (Literal->getNumConcatenated() > 1)
113 ++Count;
114 }
115 }
116
117 // Warn only when concatenation is not common in this initializer list.
118 // The current threshold is set to less than 1/5 of the string literals.
119 if (double(Count) / Size > RatioThreshold)
120 return;
121
122 diag(Loc: ConcatenatedLiteral->getBeginLoc(),
123 Description: "suspicious string literal, probably missing a comma");
124}
125
126} // namespace clang::tidy::bugprone
127

source code of clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp