1 | //=== LLVMConventionsChecker.cpp - Check LLVM codebase conventions ---*- C++ -*- |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This defines LLVMConventionsChecker, a bunch of small little checks |
10 | // for checking specific coding conventions in the LLVM/Clang codebase. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
15 | #include "clang/AST/DeclTemplate.h" |
16 | #include "clang/AST/StmtVisitor.h" |
17 | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
18 | #include "clang/StaticAnalyzer/Core/Checker.h" |
19 | #include "llvm/ADT/SmallString.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | using namespace clang; |
23 | using namespace ento; |
24 | |
25 | //===----------------------------------------------------------------------===// |
26 | // Generic type checking routines. |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | static bool IsLLVMStringRef(QualType T) { |
30 | const RecordType *RT = T->getAs<RecordType>(); |
31 | if (!RT) |
32 | return false; |
33 | |
34 | return StringRef(QualType(RT, 0).getAsString()) == "class StringRef" ; |
35 | } |
36 | |
37 | /// Check whether the declaration is semantically inside the top-level |
38 | /// namespace named by ns. |
39 | static bool InNamespace(const Decl *D, StringRef NS) { |
40 | const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(Val: D->getDeclContext()); |
41 | if (!ND) |
42 | return false; |
43 | const IdentifierInfo *II = ND->getIdentifier(); |
44 | if (!II || !II->getName().equals(RHS: NS)) |
45 | return false; |
46 | return isa<TranslationUnitDecl>(ND->getDeclContext()); |
47 | } |
48 | |
49 | static bool IsStdString(QualType T) { |
50 | if (const ElaboratedType *QT = T->getAs<ElaboratedType>()) |
51 | T = QT->getNamedType(); |
52 | |
53 | const TypedefType *TT = T->getAs<TypedefType>(); |
54 | if (!TT) |
55 | return false; |
56 | |
57 | const TypedefNameDecl *TD = TT->getDecl(); |
58 | |
59 | if (!TD->isInStdNamespace()) |
60 | return false; |
61 | |
62 | return TD->getName() == "string" ; |
63 | } |
64 | |
65 | static bool IsClangType(const RecordDecl *RD) { |
66 | return RD->getName() == "Type" && InNamespace(RD, "clang" ); |
67 | } |
68 | |
69 | static bool IsClangDecl(const RecordDecl *RD) { |
70 | return RD->getName() == "Decl" && InNamespace(RD, "clang" ); |
71 | } |
72 | |
73 | static bool IsClangStmt(const RecordDecl *RD) { |
74 | return RD->getName() == "Stmt" && InNamespace(RD, "clang" ); |
75 | } |
76 | |
77 | static bool IsClangAttr(const RecordDecl *RD) { |
78 | return RD->getName() == "Attr" && InNamespace(RD, "clang" ); |
79 | } |
80 | |
81 | static bool IsStdVector(QualType T) { |
82 | const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>(); |
83 | if (!TS) |
84 | return false; |
85 | |
86 | TemplateName TM = TS->getTemplateName(); |
87 | TemplateDecl *TD = TM.getAsTemplateDecl(); |
88 | |
89 | if (!TD || !InNamespace(TD, "std" )) |
90 | return false; |
91 | |
92 | return TD->getName() == "vector" ; |
93 | } |
94 | |
95 | static bool IsSmallVector(QualType T) { |
96 | const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>(); |
97 | if (!TS) |
98 | return false; |
99 | |
100 | TemplateName TM = TS->getTemplateName(); |
101 | TemplateDecl *TD = TM.getAsTemplateDecl(); |
102 | |
103 | if (!TD || !InNamespace(TD, "llvm" )) |
104 | return false; |
105 | |
106 | return TD->getName() == "SmallVector" ; |
107 | } |
108 | |
109 | //===----------------------------------------------------------------------===// |
110 | // CHECK: a StringRef should not be bound to a temporary std::string whose |
111 | // lifetime is shorter than the StringRef's. |
112 | //===----------------------------------------------------------------------===// |
113 | |
114 | namespace { |
115 | class StringRefCheckerVisitor : public StmtVisitor<StringRefCheckerVisitor> { |
116 | const Decl *DeclWithIssue; |
117 | BugReporter &BR; |
118 | const CheckerBase *Checker; |
119 | |
120 | public: |
121 | StringRefCheckerVisitor(const Decl *declWithIssue, BugReporter &br, |
122 | const CheckerBase *checker) |
123 | : DeclWithIssue(declWithIssue), BR(br), Checker(checker) {} |
124 | void VisitChildren(Stmt *S) { |
125 | for (Stmt *Child : S->children()) |
126 | if (Child) |
127 | Visit(Child); |
128 | } |
129 | void VisitStmt(Stmt *S) { VisitChildren(S); } |
130 | void VisitDeclStmt(DeclStmt *DS); |
131 | private: |
132 | void VisitVarDecl(VarDecl *VD); |
133 | }; |
134 | } // end anonymous namespace |
135 | |
136 | static void CheckStringRefAssignedTemporary(const Decl *D, BugReporter &BR, |
137 | const CheckerBase *Checker) { |
138 | StringRefCheckerVisitor walker(D, BR, Checker); |
139 | walker.Visit(D->getBody()); |
140 | } |
141 | |
142 | void StringRefCheckerVisitor::VisitDeclStmt(DeclStmt *S) { |
143 | VisitChildren(S); |
144 | |
145 | for (auto *I : S->decls()) |
146 | if (VarDecl *VD = dyn_cast<VarDecl>(Val: I)) |
147 | VisitVarDecl(VD); |
148 | } |
149 | |
150 | void StringRefCheckerVisitor::VisitVarDecl(VarDecl *VD) { |
151 | Expr *Init = VD->getInit(); |
152 | if (!Init) |
153 | return; |
154 | |
155 | // Pattern match for: |
156 | // StringRef x = call() (where call returns std::string) |
157 | if (!IsLLVMStringRef(VD->getType())) |
158 | return; |
159 | ExprWithCleanups *Ex1 = dyn_cast<ExprWithCleanups>(Val: Init); |
160 | if (!Ex1) |
161 | return; |
162 | CXXConstructExpr *Ex2 = dyn_cast<CXXConstructExpr>(Ex1->getSubExpr()); |
163 | if (!Ex2 || Ex2->getNumArgs() != 1) |
164 | return; |
165 | ImplicitCastExpr *Ex3 = dyn_cast<ImplicitCastExpr>(Val: Ex2->getArg(Arg: 0)); |
166 | if (!Ex3) |
167 | return; |
168 | CXXConstructExpr *Ex4 = dyn_cast<CXXConstructExpr>(Ex3->getSubExpr()); |
169 | if (!Ex4 || Ex4->getNumArgs() != 1) |
170 | return; |
171 | ImplicitCastExpr *Ex5 = dyn_cast<ImplicitCastExpr>(Val: Ex4->getArg(Arg: 0)); |
172 | if (!Ex5) |
173 | return; |
174 | CXXBindTemporaryExpr *Ex6 = dyn_cast<CXXBindTemporaryExpr>(Ex5->getSubExpr()); |
175 | if (!Ex6 || !IsStdString(Ex6->getType())) |
176 | return; |
177 | |
178 | // Okay, badness! Report an error. |
179 | const char *desc = "StringRef should not be bound to temporary " |
180 | "std::string that it outlives" ; |
181 | PathDiagnosticLocation VDLoc = |
182 | PathDiagnosticLocation::createBegin(VD, BR.getSourceManager()); |
183 | BR.EmitBasicReport(DeclWithIssue, Checker, desc, "LLVM Conventions" , desc, |
184 | VDLoc, Init->getSourceRange()); |
185 | } |
186 | |
187 | //===----------------------------------------------------------------------===// |
188 | // CHECK: Clang AST nodes should not have fields that can allocate |
189 | // memory. |
190 | //===----------------------------------------------------------------------===// |
191 | |
192 | static bool AllocatesMemory(QualType T) { |
193 | return IsStdVector(T) || IsStdString(T) || IsSmallVector(T); |
194 | } |
195 | |
196 | // This type checking could be sped up via dynamic programming. |
197 | static bool IsPartOfAST(const CXXRecordDecl *R) { |
198 | if (IsClangStmt(R) || IsClangType(R) || IsClangDecl(R) || IsClangAttr(R)) |
199 | return true; |
200 | |
201 | for (const auto &BS : R->bases()) { |
202 | QualType T = BS.getType(); |
203 | if (const RecordType *baseT = T->getAs<RecordType>()) { |
204 | CXXRecordDecl *baseD = cast<CXXRecordDecl>(Val: baseT->getDecl()); |
205 | if (IsPartOfAST(R: baseD)) |
206 | return true; |
207 | } |
208 | } |
209 | |
210 | return false; |
211 | } |
212 | |
213 | namespace { |
214 | class ASTFieldVisitor { |
215 | SmallVector<FieldDecl*, 10> FieldChain; |
216 | const CXXRecordDecl *Root; |
217 | BugReporter &BR; |
218 | const CheckerBase *Checker; |
219 | |
220 | public: |
221 | ASTFieldVisitor(const CXXRecordDecl *root, BugReporter &br, |
222 | const CheckerBase *checker) |
223 | : Root(root), BR(br), Checker(checker) {} |
224 | |
225 | void Visit(FieldDecl *D); |
226 | void ReportError(QualType T); |
227 | }; |
228 | } // end anonymous namespace |
229 | |
230 | static void CheckASTMemory(const CXXRecordDecl *R, BugReporter &BR, |
231 | const CheckerBase *Checker) { |
232 | if (!IsPartOfAST(R)) |
233 | return; |
234 | |
235 | for (auto *I : R->fields()) { |
236 | ASTFieldVisitor walker(R, BR, Checker); |
237 | walker.Visit(I); |
238 | } |
239 | } |
240 | |
241 | void ASTFieldVisitor::Visit(FieldDecl *D) { |
242 | FieldChain.push_back(Elt: D); |
243 | |
244 | QualType T = D->getType(); |
245 | |
246 | if (AllocatesMemory(T)) |
247 | ReportError(T); |
248 | |
249 | if (const RecordType *RT = T->getAs<RecordType>()) { |
250 | const RecordDecl *RD = RT->getDecl()->getDefinition(); |
251 | for (auto *I : RD->fields()) |
252 | Visit(I); |
253 | } |
254 | |
255 | FieldChain.pop_back(); |
256 | } |
257 | |
258 | void ASTFieldVisitor::ReportError(QualType T) { |
259 | SmallString<1024> buf; |
260 | llvm::raw_svector_ostream os(buf); |
261 | |
262 | os << "AST class '" << Root->getName() << "' has a field '" |
263 | << FieldChain.front()->getName() << "' that allocates heap memory" ; |
264 | if (FieldChain.size() > 1) { |
265 | os << " via the following chain: " ; |
266 | bool isFirst = true; |
267 | for (SmallVectorImpl<FieldDecl*>::iterator I=FieldChain.begin(), |
268 | E=FieldChain.end(); I!=E; ++I) { |
269 | if (!isFirst) |
270 | os << '.'; |
271 | else |
272 | isFirst = false; |
273 | os << (*I)->getName(); |
274 | } |
275 | } |
276 | os << " (type " << FieldChain.back()->getType() << ")" ; |
277 | |
278 | // Note that this will fire for every translation unit that uses this |
279 | // class. This is suboptimal, but at least scan-build will merge |
280 | // duplicate HTML reports. In the future we need a unified way of merging |
281 | // duplicate reports across translation units. For C++ classes we cannot |
282 | // just report warnings when we see an out-of-line method definition for a |
283 | // class, as that heuristic doesn't always work (the complete definition of |
284 | // the class may be in the header file, for example). |
285 | PathDiagnosticLocation L = PathDiagnosticLocation::createBegin( |
286 | FieldChain.front(), BR.getSourceManager()); |
287 | BR.EmitBasicReport(Root, Checker, "AST node allocates heap memory" , |
288 | "LLVM Conventions" , os.str(), L); |
289 | } |
290 | |
291 | //===----------------------------------------------------------------------===// |
292 | // LLVMConventionsChecker |
293 | //===----------------------------------------------------------------------===// |
294 | |
295 | namespace { |
296 | class LLVMConventionsChecker : public Checker< |
297 | check::ASTDecl<CXXRecordDecl>, |
298 | check::ASTCodeBody > { |
299 | public: |
300 | void checkASTDecl(const CXXRecordDecl *R, AnalysisManager& mgr, |
301 | BugReporter &BR) const { |
302 | if (R->isCompleteDefinition()) |
303 | CheckASTMemory(R, BR, Checker: this); |
304 | } |
305 | |
306 | void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, |
307 | BugReporter &BR) const { |
308 | CheckStringRefAssignedTemporary(D, BR, Checker: this); |
309 | } |
310 | }; |
311 | } |
312 | |
313 | void ento::registerLLVMConventionsChecker(CheckerManager &mgr) { |
314 | mgr.registerChecker<LLVMConventionsChecker>(); |
315 | } |
316 | |
317 | bool ento::shouldRegisterLLVMConventionsChecker(const CheckerManager &mgr) { |
318 | return true; |
319 | } |
320 | |