1 | //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a check that marks a raw pointer to a C++ container's |
10 | // inner buffer released when the object is destroyed. This information can |
11 | // be used by MallocChecker to detect use-after-free problems. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AllocationState.h" |
16 | #include "InterCheckerAPI.h" |
17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
18 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
19 | #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" |
20 | #include "clang/StaticAnalyzer/Core/Checker.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
22 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
23 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
24 | |
25 | using namespace clang; |
26 | using namespace ento; |
27 | |
28 | // Associate container objects with a set of raw pointer symbols. |
29 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) |
30 | REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) |
31 | |
32 | |
33 | namespace { |
34 | |
35 | class InnerPointerChecker |
36 | : public Checker<check::DeadSymbols, check::PostCall> { |
37 | |
38 | CallDescription AppendFn, AssignFn, AddressofFn, AddressofFn_, ClearFn, |
39 | CStrFn, DataFn, DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn, |
40 | ReplaceFn, ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn; |
41 | |
42 | public: |
43 | class InnerPointerBRVisitor : public BugReporterVisitor { |
44 | SymbolRef PtrToBuf; |
45 | |
46 | public: |
47 | InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} |
48 | |
49 | static void *getTag() { |
50 | static int Tag = 0; |
51 | return &Tag; |
52 | } |
53 | |
54 | void Profile(llvm::FoldingSetNodeID &ID) const override { |
55 | ID.AddPointer(Ptr: getTag()); |
56 | } |
57 | |
58 | PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, |
59 | BugReporterContext &BRC, |
60 | PathSensitiveBugReport &BR) override; |
61 | |
62 | // FIXME: Scan the map once in the visitor's constructor and do a direct |
63 | // lookup by region. |
64 | bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { |
65 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
66 | for (const auto &Entry : Map) { |
67 | if (Entry.second.contains(V: Sym)) |
68 | return true; |
69 | } |
70 | return false; |
71 | } |
72 | }; |
73 | |
74 | InnerPointerChecker() |
75 | : AppendFn({"std" , "basic_string" , "append" }), |
76 | AssignFn({"std" , "basic_string" , "assign" }), |
77 | AddressofFn({"std" , "addressof" }), AddressofFn_({"std" , "__addressof" }), |
78 | ClearFn({"std" , "basic_string" , "clear" }), |
79 | CStrFn({"std" , "basic_string" , "c_str" }), DataFn({"std" , "data" }, 1), |
80 | DataMemberFn({"std" , "basic_string" , "data" }), |
81 | EraseFn({"std" , "basic_string" , "erase" }), |
82 | InsertFn({"std" , "basic_string" , "insert" }), |
83 | PopBackFn({"std" , "basic_string" , "pop_back" }), |
84 | PushBackFn({"std" , "basic_string" , "push_back" }), |
85 | ReplaceFn({"std" , "basic_string" , "replace" }), |
86 | ReserveFn({"std" , "basic_string" , "reserve" }), |
87 | ResizeFn({"std" , "basic_string" , "resize" }), |
88 | ShrinkToFitFn({"std" , "basic_string" , "shrink_to_fit" }), |
89 | SwapFn({"std" , "basic_string" , "swap" }) {} |
90 | |
91 | /// Check whether the called member function potentially invalidates |
92 | /// pointers referring to the container object's inner buffer. |
93 | bool isInvalidatingMemberFunction(const CallEvent &Call) const; |
94 | |
95 | /// Check whether the called function returns a raw inner pointer. |
96 | bool isInnerPointerAccessFunction(const CallEvent &Call) const; |
97 | |
98 | /// Mark pointer symbols associated with the given memory region released |
99 | /// in the program state. |
100 | void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, |
101 | const MemRegion *ObjRegion, |
102 | CheckerContext &C) const; |
103 | |
104 | /// Standard library functions that take a non-const `basic_string` argument by |
105 | /// reference may invalidate its inner pointers. Check for these cases and |
106 | /// mark the pointers released. |
107 | void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, |
108 | CheckerContext &C) const; |
109 | |
110 | /// Record the connection between raw pointers referring to a container |
111 | /// object's inner buffer and the object's memory region in the program state. |
112 | /// Mark potentially invalidated pointers released. |
113 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
114 | |
115 | /// Clean up the program state map. |
116 | void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; |
117 | }; |
118 | |
119 | } // end anonymous namespace |
120 | |
121 | bool InnerPointerChecker::isInvalidatingMemberFunction( |
122 | const CallEvent &Call) const { |
123 | if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(Val: &Call)) { |
124 | OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); |
125 | if (Opc == OO_Equal || Opc == OO_PlusEqual) |
126 | return true; |
127 | return false; |
128 | } |
129 | return isa<CXXDestructorCall>(Val: Call) || |
130 | matchesAny(Call, CD1: AppendFn, CDs: AssignFn, CDs: ClearFn, CDs: EraseFn, CDs: InsertFn, |
131 | CDs: PopBackFn, CDs: PushBackFn, CDs: ReplaceFn, CDs: ReserveFn, CDs: ResizeFn, |
132 | CDs: ShrinkToFitFn, CDs: SwapFn); |
133 | } |
134 | |
135 | bool InnerPointerChecker::isInnerPointerAccessFunction( |
136 | const CallEvent &Call) const { |
137 | return matchesAny(Call, CD1: CStrFn, CDs: DataFn, CDs: DataMemberFn); |
138 | } |
139 | |
140 | void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, |
141 | ProgramStateRef State, |
142 | const MemRegion *MR, |
143 | CheckerContext &C) const { |
144 | if (const PtrSet *PS = State->get<RawPtrMap>(key: MR)) { |
145 | const Expr *Origin = Call.getOriginExpr(); |
146 | for (const auto Symbol : *PS) { |
147 | // NOTE: `Origin` may be null, and will be stored so in the symbol's |
148 | // `RefState` in MallocChecker's `RegionState` program state map. |
149 | State = allocation_state::markReleased(State, Sym: Symbol, Origin); |
150 | } |
151 | State = State->remove<RawPtrMap>(K: MR); |
152 | C.addTransition(State); |
153 | return; |
154 | } |
155 | } |
156 | |
157 | void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, |
158 | ProgramStateRef State, |
159 | CheckerContext &C) const { |
160 | if (const auto *FC = dyn_cast<AnyFunctionCall>(Val: &Call)) { |
161 | const FunctionDecl *FD = FC->getDecl(); |
162 | if (!FD || !FD->isInStdNamespace()) |
163 | return; |
164 | |
165 | for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { |
166 | QualType ParamTy = FD->getParamDecl(i: I)->getType(); |
167 | if (!ParamTy->isReferenceType() || |
168 | ParamTy->getPointeeType().isConstQualified()) |
169 | continue; |
170 | |
171 | // In case of member operator calls, `this` is counted as an |
172 | // argument but not as a parameter. |
173 | bool isaMemberOpCall = isa<CXXMemberOperatorCall>(Val: FC); |
174 | unsigned ArgI = isaMemberOpCall ? I+1 : I; |
175 | |
176 | SVal Arg = FC->getArgSVal(Index: ArgI); |
177 | const auto *ArgRegion = |
178 | dyn_cast_or_null<TypedValueRegion>(Val: Arg.getAsRegion()); |
179 | if (!ArgRegion) |
180 | continue; |
181 | |
182 | // std::addressof functions accepts a non-const reference as an argument, |
183 | // but doesn't modify it. |
184 | if (matchesAny(Call, CD1: AddressofFn, CDs: AddressofFn_)) |
185 | continue; |
186 | |
187 | markPtrSymbolsReleased(Call, State, MR: ArgRegion, C); |
188 | } |
189 | } |
190 | } |
191 | |
192 | // [string.require] |
193 | // |
194 | // "References, pointers, and iterators referring to the elements of a |
195 | // basic_string sequence may be invalidated by the following uses of that |
196 | // basic_string object: |
197 | // |
198 | // -- As an argument to any standard library function taking a reference |
199 | // to non-const basic_string as an argument. For example, as an argument to |
200 | // non-member functions swap(), operator>>(), and getline(), or as an argument |
201 | // to basic_string::swap(). |
202 | // |
203 | // -- Calling non-const member functions, except operator[], at, front, back, |
204 | // begin, rbegin, end, and rend." |
205 | |
206 | void InnerPointerChecker::checkPostCall(const CallEvent &Call, |
207 | CheckerContext &C) const { |
208 | ProgramStateRef State = C.getState(); |
209 | |
210 | // TODO: Do we need these to be typed? |
211 | const TypedValueRegion *ObjRegion = nullptr; |
212 | |
213 | if (const auto *ICall = dyn_cast<CXXInstanceCall>(Val: &Call)) { |
214 | ObjRegion = dyn_cast_or_null<TypedValueRegion>( |
215 | Val: ICall->getCXXThisVal().getAsRegion()); |
216 | |
217 | // Check [string.require] / second point. |
218 | if (isInvalidatingMemberFunction(Call)) { |
219 | markPtrSymbolsReleased(Call, State, MR: ObjRegion, C); |
220 | return; |
221 | } |
222 | } |
223 | |
224 | if (isInnerPointerAccessFunction(Call)) { |
225 | |
226 | if (isa<SimpleFunctionCall>(Val: Call)) { |
227 | // NOTE: As of now, we only have one free access function: std::data. |
228 | // If we add more functions like this in the list, hardcoded |
229 | // argument index should be changed. |
230 | ObjRegion = |
231 | dyn_cast_or_null<TypedValueRegion>(Val: Call.getArgSVal(Index: 0).getAsRegion()); |
232 | } |
233 | |
234 | if (!ObjRegion) |
235 | return; |
236 | |
237 | SVal RawPtr = Call.getReturnValue(); |
238 | if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { |
239 | // Start tracking this raw pointer by adding it to the set of symbols |
240 | // associated with this container object in the program state map. |
241 | |
242 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
243 | const PtrSet *SetPtr = State->get<RawPtrMap>(key: ObjRegion); |
244 | PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); |
245 | assert(C.wasInlined || !Set.contains(Sym)); |
246 | Set = F.add(Old: Set, V: Sym); |
247 | |
248 | State = State->set<RawPtrMap>(K: ObjRegion, E: Set); |
249 | C.addTransition(State); |
250 | } |
251 | |
252 | return; |
253 | } |
254 | |
255 | // Check [string.require] / first point. |
256 | checkFunctionArguments(Call, State, C); |
257 | } |
258 | |
259 | void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, |
260 | CheckerContext &C) const { |
261 | ProgramStateRef State = C.getState(); |
262 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
263 | RawPtrMapTy RPM = State->get<RawPtrMap>(); |
264 | for (const auto &Entry : RPM) { |
265 | if (!SymReaper.isLiveRegion(region: Entry.first)) { |
266 | // Due to incomplete destructor support, some dead regions might |
267 | // remain in the program state map. Clean them up. |
268 | State = State->remove<RawPtrMap>(K: Entry.first); |
269 | } |
270 | if (const PtrSet *OldSet = State->get<RawPtrMap>(key: Entry.first)) { |
271 | PtrSet CleanedUpSet = *OldSet; |
272 | for (const auto Symbol : Entry.second) { |
273 | if (!SymReaper.isLive(sym: Symbol)) |
274 | CleanedUpSet = F.remove(Old: CleanedUpSet, V: Symbol); |
275 | } |
276 | State = CleanedUpSet.isEmpty() |
277 | ? State->remove<RawPtrMap>(K: Entry.first) |
278 | : State->set<RawPtrMap>(K: Entry.first, E: CleanedUpSet); |
279 | } |
280 | } |
281 | C.addTransition(State); |
282 | } |
283 | |
284 | namespace clang { |
285 | namespace ento { |
286 | namespace allocation_state { |
287 | |
288 | std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { |
289 | return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(args&: Sym); |
290 | } |
291 | |
292 | const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { |
293 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
294 | for (const auto &Entry : Map) { |
295 | if (Entry.second.contains(V: Sym)) { |
296 | return Entry.first; |
297 | } |
298 | } |
299 | return nullptr; |
300 | } |
301 | |
302 | } // end namespace allocation_state |
303 | } // end namespace ento |
304 | } // end namespace clang |
305 | |
306 | PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( |
307 | const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { |
308 | if (!isSymbolTracked(State: N->getState(), Sym: PtrToBuf) || |
309 | isSymbolTracked(State: N->getFirstPred()->getState(), Sym: PtrToBuf)) |
310 | return nullptr; |
311 | |
312 | const Stmt *S = N->getStmtForDiagnostics(); |
313 | if (!S) |
314 | return nullptr; |
315 | |
316 | const MemRegion *ObjRegion = |
317 | allocation_state::getContainerObjRegion(State: N->getState(), Sym: PtrToBuf); |
318 | const auto *TypedRegion = cast<TypedValueRegion>(Val: ObjRegion); |
319 | QualType ObjTy = TypedRegion->getValueType(); |
320 | |
321 | SmallString<256> Buf; |
322 | llvm::raw_svector_ostream OS(Buf); |
323 | OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here" ; |
324 | PathDiagnosticLocation Pos(S, BRC.getSourceManager(), |
325 | N->getLocationContext()); |
326 | return std::make_shared<PathDiagnosticEventPiece>(args&: Pos, args: OS.str(), args: true); |
327 | } |
328 | |
329 | void ento::registerInnerPointerChecker(CheckerManager &Mgr) { |
330 | registerInnerPointerCheckerAux(Mgr); |
331 | Mgr.registerChecker<InnerPointerChecker>(); |
332 | } |
333 | |
334 | bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { |
335 | return true; |
336 | } |
337 | |