1 | //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This files defines PointerArithChecker, a builtin checker that checks for |
10 | // pointer arithmetic on locations other than array elements. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/AST/DeclCXX.h" |
15 | #include "clang/AST/ExprCXX.h" |
16 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
17 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
18 | #include "clang/StaticAnalyzer/Core/Checker.h" |
19 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
20 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | |
23 | using namespace clang; |
24 | using namespace ento; |
25 | |
26 | namespace { |
27 | enum class AllocKind { |
28 | SingleObject, |
29 | Array, |
30 | Unknown, |
31 | Reinterpreted // Single object interpreted as an array. |
32 | }; |
33 | } // end namespace |
34 | |
35 | namespace llvm { |
36 | template <> struct FoldingSetTrait<AllocKind> { |
37 | static inline void Profile(AllocKind X, FoldingSetNodeID &ID) { |
38 | ID.AddInteger(I: static_cast<int>(X)); |
39 | } |
40 | }; |
41 | } // end namespace llvm |
42 | |
43 | namespace { |
44 | class PointerArithChecker |
45 | : public Checker< |
46 | check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>, |
47 | check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>, |
48 | check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>, |
49 | check::PostStmt<CallExpr>, check::DeadSymbols> { |
50 | AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const; |
51 | const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic, |
52 | AllocKind &AKind, CheckerContext &C) const; |
53 | const MemRegion *getPointedRegion(const MemRegion *Region, |
54 | CheckerContext &C) const; |
55 | void reportPointerArithMisuse(const Expr *E, CheckerContext &C, |
56 | bool PointedNeeded = false) const; |
57 | void initAllocIdentifiers(ASTContext &C) const; |
58 | |
59 | const BugType BT_pointerArith{this, "Dangerous pointer arithmetic" }; |
60 | const BugType BT_polyArray{this, "Dangerous pointer arithmetic" }; |
61 | mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions; |
62 | |
63 | public: |
64 | void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const; |
65 | void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const; |
66 | void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const; |
67 | void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; |
68 | void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; |
69 | void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const; |
70 | void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; |
71 | void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; |
72 | }; |
73 | } // end namespace |
74 | |
75 | REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind) |
76 | |
77 | void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR, |
78 | CheckerContext &C) const { |
79 | // TODO: intentional leak. Some information is garbage collected too early, |
80 | // see http://reviews.llvm.org/D14203 for further information. |
81 | /*ProgramStateRef State = C.getState(); |
82 | RegionStateTy RegionStates = State->get<RegionState>(); |
83 | for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) { |
84 | if (!SR.isLiveRegion(Reg)) |
85 | State = State->remove<RegionState>(Reg); |
86 | } |
87 | C.addTransition(State);*/ |
88 | } |
89 | |
90 | AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE, |
91 | const FunctionDecl *FD) const { |
92 | // This checker try not to assume anything about placement and overloaded |
93 | // new to avoid false positives. |
94 | if (isa<CXXMethodDecl>(Val: FD)) |
95 | return AllocKind::Unknown; |
96 | if (FD->getNumParams() != 1 || FD->isVariadic()) |
97 | return AllocKind::Unknown; |
98 | if (NE->isArray()) |
99 | return AllocKind::Array; |
100 | |
101 | return AllocKind::SingleObject; |
102 | } |
103 | |
104 | const MemRegion * |
105 | PointerArithChecker::getPointedRegion(const MemRegion *Region, |
106 | CheckerContext &C) const { |
107 | assert(Region); |
108 | ProgramStateRef State = C.getState(); |
109 | SVal S = State->getSVal(R: Region); |
110 | return S.getAsRegion(); |
111 | } |
112 | |
113 | /// Checks whether a region is the part of an array. |
114 | /// In case there is a derived to base cast above the array element, the |
115 | /// Polymorphic output value is set to true. AKind output value is set to the |
116 | /// allocation kind of the inspected region. |
117 | const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region, |
118 | bool &Polymorphic, |
119 | AllocKind &AKind, |
120 | CheckerContext &C) const { |
121 | assert(Region); |
122 | while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Val: Region)) { |
123 | Region = BaseRegion->getSuperRegion(); |
124 | Polymorphic = true; |
125 | } |
126 | if (const auto *ElemRegion = dyn_cast<ElementRegion>(Val: Region)) { |
127 | Region = ElemRegion->getSuperRegion(); |
128 | } |
129 | |
130 | ProgramStateRef State = C.getState(); |
131 | if (const AllocKind *Kind = State->get<RegionState>(key: Region)) { |
132 | AKind = *Kind; |
133 | if (*Kind == AllocKind::Array) |
134 | return Region; |
135 | else |
136 | return nullptr; |
137 | } |
138 | // When the region is symbolic and we do not have any information about it, |
139 | // assume that this is an array to avoid false positives. |
140 | if (isa<SymbolicRegion>(Val: Region)) |
141 | return Region; |
142 | |
143 | // No AllocKind stored and not symbolic, assume that it points to a single |
144 | // object. |
145 | return nullptr; |
146 | } |
147 | |
148 | void PointerArithChecker::reportPointerArithMisuse(const Expr *E, |
149 | CheckerContext &C, |
150 | bool PointedNeeded) const { |
151 | SourceRange SR = E->getSourceRange(); |
152 | if (SR.isInvalid()) |
153 | return; |
154 | |
155 | ProgramStateRef State = C.getState(); |
156 | const MemRegion *Region = C.getSVal(E).getAsRegion(); |
157 | if (!Region) |
158 | return; |
159 | if (PointedNeeded) |
160 | Region = getPointedRegion(Region, C); |
161 | if (!Region) |
162 | return; |
163 | |
164 | bool IsPolymorphic = false; |
165 | AllocKind Kind = AllocKind::Unknown; |
166 | if (const MemRegion *ArrayRegion = |
167 | getArrayRegion(Region, Polymorphic&: IsPolymorphic, AKind&: Kind, C)) { |
168 | if (!IsPolymorphic) |
169 | return; |
170 | if (ExplodedNode *N = C.generateNonFatalErrorNode()) { |
171 | constexpr llvm::StringLiteral Msg = |
172 | "Pointer arithmetic on a pointer to base class is dangerous " |
173 | "because derived and base class may have different size." ; |
174 | auto R = std::make_unique<PathSensitiveBugReport>(args: BT_polyArray, args: Msg, args&: N); |
175 | R->addRange(R: E->getSourceRange()); |
176 | R->markInteresting(R: ArrayRegion); |
177 | C.emitReport(R: std::move(R)); |
178 | } |
179 | return; |
180 | } |
181 | |
182 | if (Kind == AllocKind::Reinterpreted) |
183 | return; |
184 | |
185 | // We might not have enough information about symbolic regions. |
186 | if (Kind != AllocKind::SingleObject && |
187 | Region->getKind() == MemRegion::Kind::SymbolicRegionKind) |
188 | return; |
189 | |
190 | if (ExplodedNode *N = C.generateNonFatalErrorNode()) { |
191 | constexpr llvm::StringLiteral Msg = |
192 | "Pointer arithmetic on non-array variables relies on memory layout, " |
193 | "which is dangerous." ; |
194 | auto R = std::make_unique<PathSensitiveBugReport>(args: BT_pointerArith, args: Msg, args&: N); |
195 | R->addRange(R: SR); |
196 | R->markInteresting(R: Region); |
197 | C.emitReport(R: std::move(R)); |
198 | } |
199 | } |
200 | |
201 | void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const { |
202 | if (!AllocFunctions.empty()) |
203 | return; |
204 | AllocFunctions.insert(Ptr: &C.Idents.get(Name: "alloca" )); |
205 | AllocFunctions.insert(Ptr: &C.Idents.get(Name: "malloc" )); |
206 | AllocFunctions.insert(Ptr: &C.Idents.get(Name: "realloc" )); |
207 | AllocFunctions.insert(Ptr: &C.Idents.get(Name: "calloc" )); |
208 | AllocFunctions.insert(Ptr: &C.Idents.get(Name: "valloc" )); |
209 | } |
210 | |
211 | void PointerArithChecker::checkPostStmt(const CallExpr *CE, |
212 | CheckerContext &C) const { |
213 | ProgramStateRef State = C.getState(); |
214 | const FunctionDecl *FD = C.getCalleeDecl(CE); |
215 | if (!FD) |
216 | return; |
217 | IdentifierInfo *FunI = FD->getIdentifier(); |
218 | initAllocIdentifiers(C&: C.getASTContext()); |
219 | if (AllocFunctions.count(Ptr: FunI) == 0) |
220 | return; |
221 | |
222 | SVal SV = C.getSVal(CE); |
223 | const MemRegion *Region = SV.getAsRegion(); |
224 | if (!Region) |
225 | return; |
226 | // Assume that C allocation functions allocate arrays to avoid false |
227 | // positives. |
228 | // TODO: Add heuristics to distinguish alloc calls that allocates single |
229 | // objecs. |
230 | State = State->set<RegionState>(K: Region, E: AllocKind::Array); |
231 | C.addTransition(State); |
232 | } |
233 | |
234 | void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE, |
235 | CheckerContext &C) const { |
236 | const FunctionDecl *FD = NE->getOperatorNew(); |
237 | if (!FD) |
238 | return; |
239 | |
240 | AllocKind Kind = getKindOfNewOp(NE, FD); |
241 | |
242 | ProgramStateRef State = C.getState(); |
243 | SVal AllocedVal = C.getSVal(NE); |
244 | const MemRegion *Region = AllocedVal.getAsRegion(); |
245 | if (!Region) |
246 | return; |
247 | State = State->set<RegionState>(K: Region, E: Kind); |
248 | C.addTransition(State); |
249 | } |
250 | |
251 | void PointerArithChecker::checkPostStmt(const CastExpr *CE, |
252 | CheckerContext &C) const { |
253 | if (CE->getCastKind() != CastKind::CK_BitCast) |
254 | return; |
255 | |
256 | const Expr *CastedExpr = CE->getSubExpr(); |
257 | ProgramStateRef State = C.getState(); |
258 | SVal CastedVal = C.getSVal(CastedExpr); |
259 | |
260 | const MemRegion *Region = CastedVal.getAsRegion(); |
261 | if (!Region) |
262 | return; |
263 | |
264 | // Suppress reinterpret casted hits. |
265 | State = State->set<RegionState>(K: Region, E: AllocKind::Reinterpreted); |
266 | C.addTransition(State); |
267 | } |
268 | |
269 | void PointerArithChecker::checkPreStmt(const CastExpr *CE, |
270 | CheckerContext &C) const { |
271 | if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay) |
272 | return; |
273 | |
274 | const Expr *CastedExpr = CE->getSubExpr(); |
275 | ProgramStateRef State = C.getState(); |
276 | SVal CastedVal = C.getSVal(CastedExpr); |
277 | |
278 | const MemRegion *Region = CastedVal.getAsRegion(); |
279 | if (!Region) |
280 | return; |
281 | |
282 | if (const AllocKind *Kind = State->get<RegionState>(key: Region)) { |
283 | if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted) |
284 | return; |
285 | } |
286 | State = State->set<RegionState>(K: Region, E: AllocKind::Array); |
287 | C.addTransition(State); |
288 | } |
289 | |
290 | void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp, |
291 | CheckerContext &C) const { |
292 | if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType()) |
293 | return; |
294 | reportPointerArithMisuse(E: UOp->getSubExpr(), C, PointedNeeded: true); |
295 | } |
296 | |
297 | void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr, |
298 | CheckerContext &C) const { |
299 | SVal Idx = C.getSVal(SubsExpr->getIdx()); |
300 | |
301 | // Indexing with 0 is OK. |
302 | if (Idx.isZeroConstant()) |
303 | return; |
304 | |
305 | // Indexing vector-type expressions is also OK. |
306 | if (SubsExpr->getBase()->getType()->isVectorType()) |
307 | return; |
308 | reportPointerArithMisuse(E: SubsExpr->getBase(), C); |
309 | } |
310 | |
311 | void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp, |
312 | CheckerContext &C) const { |
313 | BinaryOperatorKind OpKind = BOp->getOpcode(); |
314 | if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign) |
315 | return; |
316 | |
317 | const Expr *Lhs = BOp->getLHS(); |
318 | const Expr *Rhs = BOp->getRHS(); |
319 | ProgramStateRef State = C.getState(); |
320 | |
321 | if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) { |
322 | SVal RHSVal = C.getSVal(Rhs); |
323 | if (State->isNull(V: RHSVal).isConstrainedTrue()) |
324 | return; |
325 | reportPointerArithMisuse(E: Lhs, C, PointedNeeded: !BOp->isAdditiveOp()); |
326 | } |
327 | // The int += ptr; case is not valid C++. |
328 | if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) { |
329 | SVal LHSVal = C.getSVal(Lhs); |
330 | if (State->isNull(V: LHSVal).isConstrainedTrue()) |
331 | return; |
332 | reportPointerArithMisuse(E: Rhs, C); |
333 | } |
334 | } |
335 | |
336 | void ento::registerPointerArithChecker(CheckerManager &mgr) { |
337 | mgr.registerChecker<PointerArithChecker>(); |
338 | } |
339 | |
340 | bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) { |
341 | return true; |
342 | } |
343 | |