1 | //===-- DereferenceChecker.cpp - Null dereference checker -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This defines NullDerefChecker, a builtin check in ExprEngine that performs |
10 | // checks for null pointers at loads and stores. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/AST/ExprObjC.h" |
15 | #include "clang/AST/ExprOpenMP.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
18 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
19 | #include "clang/StaticAnalyzer/Core/Checker.h" |
20 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
22 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" |
23 | #include "llvm/ADT/SmallString.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | |
26 | using namespace clang; |
27 | using namespace ento; |
28 | |
29 | namespace { |
30 | class DereferenceChecker |
31 | : public Checker< check::Location, |
32 | check::Bind, |
33 | EventDispatcher<ImplicitNullDerefEvent> > { |
34 | enum DerefKind { NullPointer, UndefinedPointerValue }; |
35 | |
36 | BugType BT_Null{this, "Dereference of null pointer" , categories::LogicError}; |
37 | BugType BT_Undef{this, "Dereference of undefined pointer value" , |
38 | categories::LogicError}; |
39 | |
40 | void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S, |
41 | CheckerContext &C) const; |
42 | |
43 | bool suppressReport(CheckerContext &C, const Expr *E) const; |
44 | |
45 | public: |
46 | void checkLocation(SVal location, bool isLoad, const Stmt* S, |
47 | CheckerContext &C) const; |
48 | void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; |
49 | |
50 | static void AddDerefSource(raw_ostream &os, |
51 | SmallVectorImpl<SourceRange> &Ranges, |
52 | const Expr *Ex, const ProgramState *state, |
53 | const LocationContext *LCtx, |
54 | bool loadedFrom = false); |
55 | |
56 | bool SuppressAddressSpaces = false; |
57 | }; |
58 | } // end anonymous namespace |
59 | |
60 | void |
61 | DereferenceChecker::AddDerefSource(raw_ostream &os, |
62 | SmallVectorImpl<SourceRange> &Ranges, |
63 | const Expr *Ex, |
64 | const ProgramState *state, |
65 | const LocationContext *LCtx, |
66 | bool loadedFrom) { |
67 | Ex = Ex->IgnoreParenLValueCasts(); |
68 | switch (Ex->getStmtClass()) { |
69 | default: |
70 | break; |
71 | case Stmt::DeclRefExprClass: { |
72 | const DeclRefExpr *DR = cast<DeclRefExpr>(Val: Ex); |
73 | if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DR->getDecl())) { |
74 | os << " (" << (loadedFrom ? "loaded from" : "from" ) |
75 | << " variable '" << VD->getName() << "')" ; |
76 | Ranges.push_back(Elt: DR->getSourceRange()); |
77 | } |
78 | break; |
79 | } |
80 | case Stmt::MemberExprClass: { |
81 | const MemberExpr *ME = cast<MemberExpr>(Val: Ex); |
82 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
83 | << " field '" << ME->getMemberNameInfo() << "')" ; |
84 | SourceLocation L = ME->getMemberLoc(); |
85 | Ranges.push_back(Elt: SourceRange(L, L)); |
86 | break; |
87 | } |
88 | case Stmt::ObjCIvarRefExprClass: { |
89 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: Ex); |
90 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
91 | << " ivar '" << IV->getDecl()->getName() << "')" ; |
92 | SourceLocation L = IV->getLocation(); |
93 | Ranges.push_back(Elt: SourceRange(L, L)); |
94 | break; |
95 | } |
96 | } |
97 | } |
98 | |
99 | static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){ |
100 | const Expr *E = nullptr; |
101 | |
102 | // Walk through lvalue casts to get the original expression |
103 | // that syntactically caused the load. |
104 | if (const Expr *expr = dyn_cast<Expr>(Val: S)) |
105 | E = expr->IgnoreParenLValueCasts(); |
106 | |
107 | if (IsBind) { |
108 | const VarDecl *VD; |
109 | const Expr *Init; |
110 | std::tie(args&: VD, args&: Init) = parseAssignment(S); |
111 | if (VD && Init) |
112 | E = Init; |
113 | } |
114 | return E; |
115 | } |
116 | |
117 | bool DereferenceChecker::suppressReport(CheckerContext &C, |
118 | const Expr *E) const { |
119 | // Do not report dereferences on memory that use address space #256, #257, |
120 | // and #258. Those address spaces are used when dereferencing address spaces |
121 | // relative to the GS, FS, and SS segments on x86/x86-64 targets. |
122 | // Dereferencing a null pointer in these address spaces is not defined |
123 | // as an error. All other null dereferences in other address spaces |
124 | // are defined as an error unless explicitly defined. |
125 | // See https://clang.llvm.org/docs/LanguageExtensions.html, the section |
126 | // "X86/X86-64 Language Extensions" |
127 | |
128 | QualType Ty = E->getType(); |
129 | if (!Ty.hasAddressSpace()) |
130 | return false; |
131 | if (SuppressAddressSpaces) |
132 | return true; |
133 | |
134 | const llvm::Triple::ArchType Arch = |
135 | C.getASTContext().getTargetInfo().getTriple().getArch(); |
136 | |
137 | if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) { |
138 | switch (toTargetAddressSpace(AS: E->getType().getAddressSpace())) { |
139 | case 256: |
140 | case 257: |
141 | case 258: |
142 | return true; |
143 | } |
144 | } |
145 | return false; |
146 | } |
147 | |
148 | static bool isDeclRefExprToReference(const Expr *E) { |
149 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) |
150 | return DRE->getDecl()->getType()->isReferenceType(); |
151 | return false; |
152 | } |
153 | |
154 | void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, |
155 | const Stmt *S, CheckerContext &C) const { |
156 | const BugType *BT = nullptr; |
157 | llvm::StringRef DerefStr1; |
158 | llvm::StringRef DerefStr2; |
159 | switch (K) { |
160 | case DerefKind::NullPointer: |
161 | BT = &BT_Null; |
162 | DerefStr1 = " results in a null pointer dereference" ; |
163 | DerefStr2 = " results in a dereference of a null pointer" ; |
164 | break; |
165 | case DerefKind::UndefinedPointerValue: |
166 | BT = &BT_Undef; |
167 | DerefStr1 = " results in an undefined pointer dereference" ; |
168 | DerefStr2 = " results in a dereference of an undefined pointer value" ; |
169 | break; |
170 | }; |
171 | |
172 | // Generate an error node. |
173 | ExplodedNode *N = C.generateErrorNode(State); |
174 | if (!N) |
175 | return; |
176 | |
177 | SmallString<100> buf; |
178 | llvm::raw_svector_ostream os(buf); |
179 | |
180 | SmallVector<SourceRange, 2> Ranges; |
181 | |
182 | switch (S->getStmtClass()) { |
183 | case Stmt::ArraySubscriptExprClass: { |
184 | os << "Array access" ; |
185 | const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(Val: S); |
186 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
187 | state: State.get(), LCtx: N->getLocationContext()); |
188 | os << DerefStr1; |
189 | break; |
190 | } |
191 | case Stmt::OMPArraySectionExprClass: { |
192 | os << "Array access" ; |
193 | const OMPArraySectionExpr *AE = cast<OMPArraySectionExpr>(Val: S); |
194 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
195 | state: State.get(), LCtx: N->getLocationContext()); |
196 | os << DerefStr1; |
197 | break; |
198 | } |
199 | case Stmt::UnaryOperatorClass: { |
200 | os << BT->getDescription(); |
201 | const UnaryOperator *U = cast<UnaryOperator>(Val: S); |
202 | AddDerefSource(os, Ranges, Ex: U->getSubExpr()->IgnoreParens(), |
203 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
204 | break; |
205 | } |
206 | case Stmt::MemberExprClass: { |
207 | const MemberExpr *M = cast<MemberExpr>(Val: S); |
208 | if (M->isArrow() || isDeclRefExprToReference(E: M->getBase())) { |
209 | os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2; |
210 | AddDerefSource(os, Ranges, Ex: M->getBase()->IgnoreParenCasts(), |
211 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
212 | } |
213 | break; |
214 | } |
215 | case Stmt::ObjCIvarRefExprClass: { |
216 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: S); |
217 | os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2; |
218 | AddDerefSource(os, Ranges, Ex: IV->getBase()->IgnoreParenCasts(), |
219 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
220 | break; |
221 | } |
222 | default: |
223 | break; |
224 | } |
225 | |
226 | auto report = std::make_unique<PathSensitiveBugReport>( |
227 | args: *BT, args: buf.empty() ? BT->getDescription() : buf.str(), args&: N); |
228 | |
229 | bugreporter::trackExpressionValue(N, E: bugreporter::getDerefExpr(S), R&: *report); |
230 | |
231 | for (SmallVectorImpl<SourceRange>::iterator |
232 | I = Ranges.begin(), E = Ranges.end(); I!=E; ++I) |
233 | report->addRange(R: *I); |
234 | |
235 | C.emitReport(R: std::move(report)); |
236 | } |
237 | |
238 | void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, |
239 | CheckerContext &C) const { |
240 | // Check for dereference of an undefined value. |
241 | if (l.isUndef()) { |
242 | const Expr *DerefExpr = getDereferenceExpr(S); |
243 | if (!suppressReport(C, E: DerefExpr)) |
244 | reportBug(DerefKind::UndefinedPointerValue, C.getState(), DerefExpr, C); |
245 | return; |
246 | } |
247 | |
248 | DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>(); |
249 | |
250 | // Check for null dereferences. |
251 | if (!isa<Loc>(Val: location)) |
252 | return; |
253 | |
254 | ProgramStateRef state = C.getState(); |
255 | |
256 | ProgramStateRef notNullState, nullState; |
257 | std::tie(args&: notNullState, args&: nullState) = state->assume(Cond: location); |
258 | |
259 | if (nullState) { |
260 | if (!notNullState) { |
261 | // We know that 'location' can only be null. This is what |
262 | // we call an "explicit" null dereference. |
263 | const Expr *expr = getDereferenceExpr(S); |
264 | if (!suppressReport(C, E: expr)) { |
265 | reportBug(DerefKind::NullPointer, nullState, expr, C); |
266 | return; |
267 | } |
268 | } |
269 | |
270 | // Otherwise, we have the case where the location could either be |
271 | // null or not-null. Record the error node as an "implicit" null |
272 | // dereference. |
273 | if (ExplodedNode *N = C.generateSink(State: nullState, Pred: C.getPredecessor())) { |
274 | ImplicitNullDerefEvent event = {.Location: l, .IsLoad: isLoad, .SinkNode: N, .BR: &C.getBugReporter(), |
275 | /*IsDirectDereference=*/true}; |
276 | dispatchEvent(event); |
277 | } |
278 | } |
279 | |
280 | // From this point forward, we know that the location is not null. |
281 | C.addTransition(State: notNullState); |
282 | } |
283 | |
284 | void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, |
285 | CheckerContext &C) const { |
286 | // If we're binding to a reference, check if the value is known to be null. |
287 | if (V.isUndef()) |
288 | return; |
289 | |
290 | const MemRegion *MR = L.getAsRegion(); |
291 | const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(Val: MR); |
292 | if (!TVR) |
293 | return; |
294 | |
295 | if (!TVR->getValueType()->isReferenceType()) |
296 | return; |
297 | |
298 | ProgramStateRef State = C.getState(); |
299 | |
300 | ProgramStateRef StNonNull, StNull; |
301 | std::tie(args&: StNonNull, args&: StNull) = State->assume(Cond: V.castAs<DefinedOrUnknownSVal>()); |
302 | |
303 | if (StNull) { |
304 | if (!StNonNull) { |
305 | const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true); |
306 | if (!suppressReport(C, E: expr)) { |
307 | reportBug(DerefKind::NullPointer, StNull, expr, C); |
308 | return; |
309 | } |
310 | } |
311 | |
312 | // At this point the value could be either null or non-null. |
313 | // Record this as an "implicit" null dereference. |
314 | if (ExplodedNode *N = C.generateSink(State: StNull, Pred: C.getPredecessor())) { |
315 | ImplicitNullDerefEvent event = {.Location: V, /*isLoad=*/.IsLoad: true, .SinkNode: N, |
316 | .BR: &C.getBugReporter(), |
317 | /*IsDirectDereference=*/true}; |
318 | dispatchEvent(event); |
319 | } |
320 | } |
321 | |
322 | // Unlike a regular null dereference, initializing a reference with a |
323 | // dereferenced null pointer does not actually cause a runtime exception in |
324 | // Clang's implementation of references. |
325 | // |
326 | // int &r = *p; // safe?? |
327 | // if (p != NULL) return; // uh-oh |
328 | // r = 5; // trap here |
329 | // |
330 | // The standard says this is invalid as soon as we try to create a "null |
331 | // reference" (there is no such thing), but turning this into an assumption |
332 | // that 'p' is never null will not match our actual runtime behavior. |
333 | // So we do not record this assumption, allowing us to warn on the last line |
334 | // of this example. |
335 | // |
336 | // We do need to add a transition because we may have generated a sink for |
337 | // the "implicit" null dereference. |
338 | C.addTransition(State, Tag: this); |
339 | } |
340 | |
341 | void ento::registerDereferenceChecker(CheckerManager &mgr) { |
342 | auto *Chk = mgr.registerChecker<DereferenceChecker>(); |
343 | Chk->SuppressAddressSpaces = mgr.getAnalyzerOptions().getCheckerBooleanOption( |
344 | CheckerName: mgr.getCurrentCheckerName(), OptionName: "SuppressAddressSpaces" ); |
345 | } |
346 | |
347 | bool ento::shouldRegisterDereferenceChecker(const CheckerManager &mgr) { |
348 | return true; |
349 | } |
350 | |