1 | //===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "StringIntegerAssignmentCheck.h" |
10 | #include "clang/AST/ASTContext.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | #include "clang/Lex/Lexer.h" |
13 | |
14 | using namespace clang::ast_matchers; |
15 | |
16 | namespace clang::tidy::bugprone { |
17 | |
18 | void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) { |
19 | Finder->addMatcher( |
20 | NodeMatch: cxxOperatorCallExpr( |
21 | hasAnyOverloadedOperatorName("=" , "+=" ), |
22 | callee(InnerMatcher: cxxMethodDecl(ofClass(InnerMatcher: classTemplateSpecializationDecl( |
23 | hasName(Name: "::std::basic_string" ), |
24 | hasTemplateArgument(N: 0, InnerMatcher: refersToType(InnerMatcher: hasCanonicalType( |
25 | InnerMatcher: qualType().bind(ID: "type" )))))))), |
26 | hasArgument( |
27 | N: 1, |
28 | InnerMatcher: ignoringImpCasts( |
29 | InnerMatcher: expr(hasType(InnerMatcher: isInteger()), unless(hasType(InnerMatcher: isAnyCharacter())), |
30 | // Ignore calls to tolower/toupper (see PR27723). |
31 | unless(callExpr(callee(InnerMatcher: functionDecl( |
32 | hasAnyName("tolower" , "std::tolower" , "toupper" , |
33 | "std::toupper" ))))), |
34 | // Do not warn if assigning e.g. `CodePoint` to |
35 | // `basic_string<CodePoint>` |
36 | unless(hasType(InnerMatcher: qualType( |
37 | hasCanonicalType(InnerMatcher: equalsBoundNode(ID: "type" )))))) |
38 | .bind(ID: "expr" ))), |
39 | unless(isInTemplateInstantiation())), |
40 | Action: this); |
41 | } |
42 | |
43 | class CharExpressionDetector { |
44 | public: |
45 | CharExpressionDetector(QualType CharType, const ASTContext &Ctx) |
46 | : CharType(CharType), Ctx(Ctx) {} |
47 | |
48 | bool isLikelyCharExpression(const Expr *E) const { |
49 | if (isCharTyped(E)) |
50 | return true; |
51 | |
52 | if (const auto *BinOp = dyn_cast<BinaryOperator>(Val: E)) { |
53 | const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts(); |
54 | const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts(); |
55 | // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`. |
56 | if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp()) |
57 | return handleBinaryOp(Opcode: BinOp->getOpcode(), LHS, RHS) || |
58 | handleBinaryOp(Opcode: BinOp->getOpcode(), LHS: RHS, RHS: LHS); |
59 | // Except in the case of '%'. |
60 | if (BinOp->getOpcode() == BO_Rem) |
61 | return handleBinaryOp(Opcode: BinOp->getOpcode(), LHS, RHS); |
62 | return false; |
63 | } |
64 | |
65 | // Ternary where at least one branch is a likely char expression, e.g. |
66 | // i < 265 ? i : ' ' |
67 | if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(Val: E)) |
68 | return isLikelyCharExpression( |
69 | E: CondOp->getFalseExpr()->IgnoreParenImpCasts()) || |
70 | isLikelyCharExpression( |
71 | E: CondOp->getTrueExpr()->IgnoreParenImpCasts()); |
72 | return false; |
73 | } |
74 | |
75 | private: |
76 | bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS, |
77 | const Expr *const RHS) const { |
78 | // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an |
79 | // int), e.g. |
80 | // 'a' + c |
81 | if (isCharTyped(E: LHS) && isCharTyped(E: RHS)) |
82 | return true; |
83 | |
84 | // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g. |
85 | // i & 0xff |
86 | if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(E: RHS)) |
87 | return true; |
88 | |
89 | // <char_expr> | <char_valued_constant>, e.g. |
90 | // c | 0x80 |
91 | if (Opcode == BO_Or && isCharTyped(E: LHS) && isCharValuedConstant(E: RHS)) |
92 | return true; |
93 | |
94 | // <char_constant> + <likely_char_expr>, e.g. |
95 | // 'a' + (i % 26) |
96 | if (Opcode == BO_Add) |
97 | return isCharConstant(E: LHS) && isLikelyCharExpression(E: RHS); |
98 | |
99 | return false; |
100 | } |
101 | |
102 | // Returns true if `E` is an character constant. |
103 | bool isCharConstant(const Expr *E) const { |
104 | return isCharTyped(E) && isCharValuedConstant(E); |
105 | }; |
106 | |
107 | // Returns true if `E` is an integer constant which fits in `CharType`. |
108 | bool isCharValuedConstant(const Expr *E) const { |
109 | if (E->isInstantiationDependent()) |
110 | return false; |
111 | Expr::EvalResult EvalResult; |
112 | if (!E->EvaluateAsInt(Result&: EvalResult, Ctx, AllowSideEffects: Expr::SE_AllowSideEffects)) |
113 | return false; |
114 | return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType); |
115 | }; |
116 | |
117 | // Returns true if `E` has the right character type. |
118 | bool isCharTyped(const Expr *E) const { |
119 | return E->getType().getCanonicalType().getTypePtr() == |
120 | CharType.getTypePtr(); |
121 | }; |
122 | |
123 | const QualType CharType; |
124 | const ASTContext &Ctx; |
125 | }; |
126 | |
127 | void StringIntegerAssignmentCheck::check( |
128 | const MatchFinder::MatchResult &Result) { |
129 | const auto *Argument = Result.Nodes.getNodeAs<Expr>(ID: "expr" ); |
130 | const auto CharType = |
131 | Result.Nodes.getNodeAs<QualType>(ID: "type" )->getCanonicalType(); |
132 | SourceLocation Loc = Argument->getBeginLoc(); |
133 | |
134 | // Try to detect a few common expressions to reduce false positives. |
135 | if (CharExpressionDetector(CharType, *Result.Context) |
136 | .isLikelyCharExpression(E: Argument)) |
137 | return; |
138 | |
139 | auto Diag = |
140 | diag(Loc, Description: "an integer is interpreted as a character code when assigning " |
141 | "it to a string; if this is intended, cast the integer to the " |
142 | "appropriate character type; if you want a string " |
143 | "representation, use the appropriate conversion facility" ); |
144 | |
145 | if (Loc.isMacroID()) |
146 | return; |
147 | |
148 | bool IsWideCharType = CharType->isWideCharType(); |
149 | if (!CharType->isCharType() && !IsWideCharType) |
150 | return; |
151 | bool IsOneDigit = false; |
152 | bool IsLiteral = false; |
153 | if (const auto *Literal = dyn_cast<IntegerLiteral>(Val: Argument)) { |
154 | IsOneDigit = Literal->getValue().getLimitedValue() < 10; |
155 | IsLiteral = true; |
156 | } |
157 | |
158 | SourceLocation EndLoc = Lexer::getLocForEndOfToken( |
159 | Loc: Argument->getEndLoc(), Offset: 0, SM: *Result.SourceManager, LangOpts: getLangOpts()); |
160 | if (IsOneDigit) { |
161 | Diag << FixItHint::CreateInsertion(InsertionLoc: Loc, Code: IsWideCharType ? "L'" : "'" ) |
162 | << FixItHint::CreateInsertion(InsertionLoc: EndLoc, Code: "'" ); |
163 | return; |
164 | } |
165 | if (IsLiteral) { |
166 | Diag << FixItHint::CreateInsertion(InsertionLoc: Loc, Code: IsWideCharType ? "L\"" : "\"" ) |
167 | << FixItHint::CreateInsertion(InsertionLoc: EndLoc, Code: "\"" ); |
168 | return; |
169 | } |
170 | |
171 | if (getLangOpts().CPlusPlus11) { |
172 | Diag << FixItHint::CreateInsertion(InsertionLoc: Loc, Code: IsWideCharType ? "std::to_wstring(" |
173 | : "std::to_string(" ) |
174 | << FixItHint::CreateInsertion(InsertionLoc: EndLoc, Code: ")" ); |
175 | } |
176 | } |
177 | |
178 | } // namespace clang::tidy::bugprone |
179 | |