1 | //== RegionStore.cpp - Field-sensitive store model --------------*- C++ -*--==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a basic region store model. In this model, we do have field |
10 | // sensitivity. But we assume nothing about the heap shape. So recursive data |
11 | // structures are largely ignored. Basically we do 1-limiting analysis. |
12 | // Parameter pointers are assumed with no aliasing. Pointee objects of |
13 | // parameters are created lazily. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "clang/AST/Attr.h" |
18 | #include "clang/AST/CharUnits.h" |
19 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
20 | #include "clang/Analysis/Analyses/LiveVariables.h" |
21 | #include "clang/Analysis/AnalysisDeclContext.h" |
22 | #include "clang/Basic/JsonSupport.h" |
23 | #include "clang/Basic/TargetInfo.h" |
24 | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
26 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" |
27 | #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" |
28 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
29 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
30 | #include "llvm/ADT/ImmutableMap.h" |
31 | #include "llvm/ADT/STLExtras.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <optional> |
34 | #include <utility> |
35 | |
36 | using namespace clang; |
37 | using namespace ento; |
38 | |
39 | //===----------------------------------------------------------------------===// |
40 | // Representation of binding keys. |
41 | //===----------------------------------------------------------------------===// |
42 | |
43 | namespace { |
44 | class BindingKey { |
45 | public: |
46 | enum Kind { Default = 0x0, Direct = 0x1 }; |
47 | private: |
48 | enum { Symbolic = 0x2 }; |
49 | |
50 | llvm::PointerIntPair<const MemRegion *, 2> P; |
51 | uint64_t Data; |
52 | |
53 | /// Create a key for a binding to region \p r, which has a symbolic offset |
54 | /// from region \p Base. |
55 | explicit BindingKey(const SubRegion *r, const SubRegion *Base, Kind k) |
56 | : P(r, k | Symbolic), Data(reinterpret_cast<uintptr_t>(Base)) { |
57 | assert(r && Base && "Must have known regions." ); |
58 | assert(getConcreteOffsetRegion() == Base && "Failed to store base region" ); |
59 | } |
60 | |
61 | /// Create a key for a binding at \p offset from base region \p r. |
62 | explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k) |
63 | : P(r, k), Data(offset) { |
64 | assert(r && "Must have known regions." ); |
65 | assert(getOffset() == offset && "Failed to store offset" ); |
66 | assert((r == r->getBaseRegion() || |
67 | isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) && |
68 | "Not a base" ); |
69 | } |
70 | public: |
71 | |
72 | bool isDirect() const { return P.getInt() & Direct; } |
73 | bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } |
74 | |
75 | const MemRegion *getRegion() const { return P.getPointer(); } |
76 | uint64_t getOffset() const { |
77 | assert(!hasSymbolicOffset()); |
78 | return Data; |
79 | } |
80 | |
81 | const SubRegion *getConcreteOffsetRegion() const { |
82 | assert(hasSymbolicOffset()); |
83 | return reinterpret_cast<const SubRegion *>(static_cast<uintptr_t>(Data)); |
84 | } |
85 | |
86 | const MemRegion *getBaseRegion() const { |
87 | if (hasSymbolicOffset()) |
88 | return getConcreteOffsetRegion()->getBaseRegion(); |
89 | return getRegion()->getBaseRegion(); |
90 | } |
91 | |
92 | void Profile(llvm::FoldingSetNodeID& ID) const { |
93 | ID.AddPointer(Ptr: P.getOpaqueValue()); |
94 | ID.AddInteger(I: Data); |
95 | } |
96 | |
97 | static BindingKey Make(const MemRegion *R, Kind k); |
98 | |
99 | bool operator<(const BindingKey &X) const { |
100 | if (P.getOpaqueValue() < X.P.getOpaqueValue()) |
101 | return true; |
102 | if (P.getOpaqueValue() > X.P.getOpaqueValue()) |
103 | return false; |
104 | return Data < X.Data; |
105 | } |
106 | |
107 | bool operator==(const BindingKey &X) const { |
108 | return P.getOpaqueValue() == X.P.getOpaqueValue() && |
109 | Data == X.Data; |
110 | } |
111 | |
112 | LLVM_DUMP_METHOD void dump() const; |
113 | }; |
114 | } // end anonymous namespace |
115 | |
116 | BindingKey BindingKey::Make(const MemRegion *R, Kind k) { |
117 | const RegionOffset &RO = R->getAsOffset(); |
118 | if (RO.hasSymbolicOffset()) |
119 | return BindingKey(cast<SubRegion>(Val: R), cast<SubRegion>(Val: RO.getRegion()), k); |
120 | |
121 | return BindingKey(RO.getRegion(), RO.getOffset(), k); |
122 | } |
123 | |
124 | namespace llvm { |
125 | static inline raw_ostream &operator<<(raw_ostream &Out, BindingKey K) { |
126 | Out << "\"kind\": \"" << (K.isDirect() ? "Direct" : "Default" ) |
127 | << "\", \"offset\": " ; |
128 | |
129 | if (!K.hasSymbolicOffset()) |
130 | Out << K.getOffset(); |
131 | else |
132 | Out << "null" ; |
133 | |
134 | return Out; |
135 | } |
136 | |
137 | } // namespace llvm |
138 | |
139 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
140 | void BindingKey::dump() const { llvm::errs() << *this; } |
141 | #endif |
142 | |
143 | //===----------------------------------------------------------------------===// |
144 | // Actual Store type. |
145 | //===----------------------------------------------------------------------===// |
146 | |
147 | typedef llvm::ImmutableMap<BindingKey, SVal> ClusterBindings; |
148 | typedef llvm::ImmutableMapRef<BindingKey, SVal> ClusterBindingsRef; |
149 | typedef std::pair<BindingKey, SVal> BindingPair; |
150 | |
151 | typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings> |
152 | RegionBindings; |
153 | |
154 | namespace { |
155 | class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *, |
156 | ClusterBindings> { |
157 | ClusterBindings::Factory *CBFactory; |
158 | |
159 | // This flag indicates whether the current bindings are within the analysis |
160 | // that has started from main(). It affects how we perform loads from |
161 | // global variables that have initializers: if we have observed the |
162 | // program execution from the start and we know that these variables |
163 | // have not been overwritten yet, we can be sure that their initializers |
164 | // are still relevant. This flag never gets changed when the bindings are |
165 | // updated, so it could potentially be moved into RegionStoreManager |
166 | // (as if it's the same bindings but a different loading procedure) |
167 | // however that would have made the manager needlessly stateful. |
168 | bool IsMainAnalysis; |
169 | |
170 | public: |
171 | typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings> |
172 | ParentTy; |
173 | |
174 | RegionBindingsRef(ClusterBindings::Factory &CBFactory, |
175 | const RegionBindings::TreeTy *T, |
176 | RegionBindings::TreeTy::Factory *F, |
177 | bool IsMainAnalysis) |
178 | : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F), |
179 | CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} |
180 | |
181 | RegionBindingsRef(const ParentTy &P, |
182 | ClusterBindings::Factory &CBFactory, |
183 | bool IsMainAnalysis) |
184 | : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P), |
185 | CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} |
186 | |
187 | RegionBindingsRef add(key_type_ref K, data_type_ref D) const { |
188 | return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D), |
189 | *CBFactory, IsMainAnalysis); |
190 | } |
191 | |
192 | RegionBindingsRef remove(key_type_ref K) const { |
193 | return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K), |
194 | *CBFactory, IsMainAnalysis); |
195 | } |
196 | |
197 | RegionBindingsRef addBinding(BindingKey K, SVal V) const; |
198 | |
199 | RegionBindingsRef addBinding(const MemRegion *R, |
200 | BindingKey::Kind k, SVal V) const; |
201 | |
202 | const SVal *lookup(BindingKey K) const; |
203 | const SVal *lookup(const MemRegion *R, BindingKey::Kind k) const; |
204 | using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup; |
205 | |
206 | RegionBindingsRef removeBinding(BindingKey K); |
207 | |
208 | RegionBindingsRef removeBinding(const MemRegion *R, |
209 | BindingKey::Kind k); |
210 | |
211 | RegionBindingsRef removeBinding(const MemRegion *R) { |
212 | return removeBinding(R, k: BindingKey::Direct). |
213 | removeBinding(R, k: BindingKey::Default); |
214 | } |
215 | |
216 | std::optional<SVal> getDirectBinding(const MemRegion *R) const; |
217 | |
218 | /// getDefaultBinding - Returns an SVal* representing an optional default |
219 | /// binding associated with a region and its subregions. |
220 | std::optional<SVal> getDefaultBinding(const MemRegion *R) const; |
221 | |
222 | /// Return the internal tree as a Store. |
223 | Store asStore() const { |
224 | llvm::PointerIntPair<Store, 1, bool> Ptr = { |
225 | asImmutableMap().getRootWithoutRetain(), IsMainAnalysis}; |
226 | return reinterpret_cast<Store>(Ptr.getOpaqueValue()); |
227 | } |
228 | |
229 | bool isMainAnalysis() const { |
230 | return IsMainAnalysis; |
231 | } |
232 | |
233 | void printJson(raw_ostream &Out, const char *NL = "\n" , |
234 | unsigned int Space = 0, bool IsDot = false) const { |
235 | for (iterator I = begin(), E = end(); I != E; ++I) { |
236 | // TODO: We might need a .printJson for I.getKey() as well. |
237 | Indent(Out, Space, IsDot) |
238 | << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \"" |
239 | << (const void *)I.getKey() << "\", \"items\": [" << NL; |
240 | |
241 | ++Space; |
242 | const ClusterBindings &CB = I.getData(); |
243 | for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE; |
244 | ++CI) { |
245 | Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": " ; |
246 | CI.getData().printJson(Out, /*AddQuotes=*/true); |
247 | Out << " }" ; |
248 | if (std::next(x: CI) != CE) |
249 | Out << ','; |
250 | Out << NL; |
251 | } |
252 | |
253 | --Space; |
254 | Indent(Out, Space, IsDot) << "]}" ; |
255 | if (std::next(x: I) != E) |
256 | Out << ','; |
257 | Out << NL; |
258 | } |
259 | } |
260 | |
261 | LLVM_DUMP_METHOD void dump() const { printJson(Out&: llvm::errs()); } |
262 | }; |
263 | } // end anonymous namespace |
264 | |
265 | typedef const RegionBindingsRef& RegionBindingsConstRef; |
266 | |
267 | std::optional<SVal> |
268 | RegionBindingsRef::getDirectBinding(const MemRegion *R) const { |
269 | const SVal *V = lookup(R, k: BindingKey::Direct); |
270 | return V ? std::optional<SVal>(*V) : std::nullopt; |
271 | } |
272 | |
273 | std::optional<SVal> |
274 | RegionBindingsRef::getDefaultBinding(const MemRegion *R) const { |
275 | const SVal *V = lookup(R, k: BindingKey::Default); |
276 | return V ? std::optional<SVal>(*V) : std::nullopt; |
277 | } |
278 | |
279 | RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const { |
280 | const MemRegion *Base = K.getBaseRegion(); |
281 | |
282 | const ClusterBindings *ExistingCluster = lookup(K: Base); |
283 | ClusterBindings Cluster = |
284 | (ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap()); |
285 | |
286 | ClusterBindings NewCluster = CBFactory->add(Old: Cluster, K, D: V); |
287 | return add(K: Base, D: NewCluster); |
288 | } |
289 | |
290 | |
291 | RegionBindingsRef RegionBindingsRef::addBinding(const MemRegion *R, |
292 | BindingKey::Kind k, |
293 | SVal V) const { |
294 | return addBinding(K: BindingKey::Make(R, k), V); |
295 | } |
296 | |
297 | const SVal *RegionBindingsRef::lookup(BindingKey K) const { |
298 | const ClusterBindings *Cluster = lookup(K: K.getBaseRegion()); |
299 | if (!Cluster) |
300 | return nullptr; |
301 | return Cluster->lookup(K); |
302 | } |
303 | |
304 | const SVal *RegionBindingsRef::lookup(const MemRegion *R, |
305 | BindingKey::Kind k) const { |
306 | return lookup(K: BindingKey::Make(R, k)); |
307 | } |
308 | |
309 | RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) { |
310 | const MemRegion *Base = K.getBaseRegion(); |
311 | const ClusterBindings *Cluster = lookup(K: Base); |
312 | if (!Cluster) |
313 | return *this; |
314 | |
315 | ClusterBindings NewCluster = CBFactory->remove(Old: *Cluster, K); |
316 | if (NewCluster.isEmpty()) |
317 | return remove(K: Base); |
318 | return add(K: Base, D: NewCluster); |
319 | } |
320 | |
321 | RegionBindingsRef RegionBindingsRef::removeBinding(const MemRegion *R, |
322 | BindingKey::Kind k){ |
323 | return removeBinding(K: BindingKey::Make(R, k)); |
324 | } |
325 | |
326 | //===----------------------------------------------------------------------===// |
327 | // Main RegionStore logic. |
328 | //===----------------------------------------------------------------------===// |
329 | |
330 | namespace { |
331 | class InvalidateRegionsWorker; |
332 | |
333 | class RegionStoreManager : public StoreManager { |
334 | public: |
335 | RegionBindings::Factory RBFactory; |
336 | mutable ClusterBindings::Factory CBFactory; |
337 | |
338 | typedef std::vector<SVal> SValListTy; |
339 | private: |
340 | typedef llvm::DenseMap<const LazyCompoundValData *, |
341 | SValListTy> LazyBindingsMapTy; |
342 | LazyBindingsMapTy LazyBindingsMap; |
343 | |
344 | /// The largest number of fields a struct can have and still be |
345 | /// considered "small". |
346 | /// |
347 | /// This is currently used to decide whether or not it is worth "forcing" a |
348 | /// LazyCompoundVal on bind. |
349 | /// |
350 | /// This is controlled by 'region-store-small-struct-limit' option. |
351 | /// To disable all small-struct-dependent behavior, set the option to "0". |
352 | unsigned SmallStructLimit; |
353 | |
354 | /// The largest number of element an array can have and still be |
355 | /// considered "small". |
356 | /// |
357 | /// This is currently used to decide whether or not it is worth "forcing" a |
358 | /// LazyCompoundVal on bind. |
359 | /// |
360 | /// This is controlled by 'region-store-small-struct-limit' option. |
361 | /// To disable all small-struct-dependent behavior, set the option to "0". |
362 | unsigned SmallArrayLimit; |
363 | |
364 | /// A helper used to populate the work list with the given set of |
365 | /// regions. |
366 | void populateWorkList(InvalidateRegionsWorker &W, |
367 | ArrayRef<SVal> Values, |
368 | InvalidatedRegions *TopLevelRegions); |
369 | |
370 | public: |
371 | RegionStoreManager(ProgramStateManager &mgr) |
372 | : StoreManager(mgr), RBFactory(mgr.getAllocator()), |
373 | CBFactory(mgr.getAllocator()), SmallStructLimit(0), SmallArrayLimit(0) { |
374 | ExprEngine &Eng = StateMgr.getOwningEngine(); |
375 | AnalyzerOptions &Options = Eng.getAnalysisManager().options; |
376 | SmallStructLimit = Options.RegionStoreSmallStructLimit; |
377 | SmallArrayLimit = Options.RegionStoreSmallArrayLimit; |
378 | } |
379 | |
380 | /// setImplicitDefaultValue - Set the default binding for the provided |
381 | /// MemRegion to the value implicitly defined for compound literals when |
382 | /// the value is not specified. |
383 | RegionBindingsRef setImplicitDefaultValue(RegionBindingsConstRef B, |
384 | const MemRegion *R, QualType T); |
385 | |
386 | /// ArrayToPointer - Emulates the "decay" of an array to a pointer |
387 | /// type. 'Array' represents the lvalue of the array being decayed |
388 | /// to a pointer, and the returned SVal represents the decayed |
389 | /// version of that lvalue (i.e., a pointer to the first element of |
390 | /// the array). This is called by ExprEngine when evaluating |
391 | /// casts from arrays to pointers. |
392 | SVal ArrayToPointer(Loc Array, QualType ElementTy) override; |
393 | |
394 | /// Creates the Store that correctly represents memory contents before |
395 | /// the beginning of the analysis of the given top-level stack frame. |
396 | StoreRef getInitialStore(const LocationContext *InitLoc) override { |
397 | bool IsMainAnalysis = false; |
398 | if (const auto *FD = dyn_cast<FunctionDecl>(Val: InitLoc->getDecl())) |
399 | IsMainAnalysis = FD->isMain() && !Ctx.getLangOpts().CPlusPlus; |
400 | return StoreRef(RegionBindingsRef( |
401 | RegionBindingsRef::ParentTy(RBFactory.getEmptyMap(), RBFactory), |
402 | CBFactory, IsMainAnalysis).asStore(), *this); |
403 | } |
404 | |
405 | //===-------------------------------------------------------------------===// |
406 | // Binding values to regions. |
407 | //===-------------------------------------------------------------------===// |
408 | RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, |
409 | const Expr *Ex, |
410 | unsigned Count, |
411 | const LocationContext *LCtx, |
412 | RegionBindingsRef B, |
413 | InvalidatedRegions *Invalidated); |
414 | |
415 | StoreRef invalidateRegions(Store store, |
416 | ArrayRef<SVal> Values, |
417 | const Expr *E, unsigned Count, |
418 | const LocationContext *LCtx, |
419 | const CallEvent *Call, |
420 | InvalidatedSymbols &IS, |
421 | RegionAndSymbolInvalidationTraits &ITraits, |
422 | InvalidatedRegions *Invalidated, |
423 | InvalidatedRegions *InvalidatedTopLevel) override; |
424 | |
425 | bool scanReachableSymbols(Store S, const MemRegion *R, |
426 | ScanReachableSymbols &Callbacks) override; |
427 | |
428 | RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, |
429 | const SubRegion *R); |
430 | std::optional<SVal> |
431 | getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, |
432 | const ElementRegion *R); |
433 | std::optional<SVal> |
434 | getSValFromInitListExpr(const InitListExpr *ILE, |
435 | const SmallVector<uint64_t, 2> &ConcreteOffsets, |
436 | QualType ElemT); |
437 | SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, |
438 | QualType ElemT); |
439 | |
440 | public: // Part of public interface to class. |
441 | |
442 | StoreRef Bind(Store store, Loc LV, SVal V) override { |
443 | return StoreRef(bind(B: getRegionBindings(store), LV, V).asStore(), *this); |
444 | } |
445 | |
446 | RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V); |
447 | |
448 | // BindDefaultInitial is only used to initialize a region with |
449 | // a default value. |
450 | StoreRef BindDefaultInitial(Store store, const MemRegion *R, |
451 | SVal V) override { |
452 | RegionBindingsRef B = getRegionBindings(store); |
453 | // Use other APIs when you have to wipe the region that was initialized |
454 | // earlier. |
455 | assert(!(B.getDefaultBinding(R) || B.getDirectBinding(R)) && |
456 | "Double initialization!" ); |
457 | B = B.addBinding(K: BindingKey::Make(R, k: BindingKey::Default), V); |
458 | return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); |
459 | } |
460 | |
461 | // BindDefaultZero is used for zeroing constructors that may accidentally |
462 | // overwrite existing bindings. |
463 | StoreRef BindDefaultZero(Store store, const MemRegion *R) override { |
464 | // FIXME: The offsets of empty bases can be tricky because of |
465 | // of the so called "empty base class optimization". |
466 | // If a base class has been optimized out |
467 | // we should not try to create a binding, otherwise we should. |
468 | // Unfortunately, at the moment ASTRecordLayout doesn't expose |
469 | // the actual sizes of the empty bases |
470 | // and trying to infer them from offsets/alignments |
471 | // seems to be error-prone and non-trivial because of the trailing padding. |
472 | // As a temporary mitigation we don't create bindings for empty bases. |
473 | if (const auto *BR = dyn_cast<CXXBaseObjectRegion>(Val: R)) |
474 | if (BR->getDecl()->isEmpty()) |
475 | return StoreRef(store, *this); |
476 | |
477 | RegionBindingsRef B = getRegionBindings(store); |
478 | SVal V = svalBuilder.makeZeroVal(type: Ctx.CharTy); |
479 | B = removeSubRegionBindings(B, R: cast<SubRegion>(Val: R)); |
480 | B = B.addBinding(K: BindingKey::Make(R, k: BindingKey::Default), V); |
481 | return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); |
482 | } |
483 | |
484 | /// Attempt to extract the fields of \p LCV and bind them to the struct region |
485 | /// \p R. |
486 | /// |
487 | /// This path is used when it seems advantageous to "force" loading the values |
488 | /// within a LazyCompoundVal to bind memberwise to the struct region, rather |
489 | /// than using a Default binding at the base of the entire region. This is a |
490 | /// heuristic attempting to avoid building long chains of LazyCompoundVals. |
491 | /// |
492 | /// \returns The updated store bindings, or \c std::nullopt if binding |
493 | /// non-lazily would be too expensive. |
494 | std::optional<RegionBindingsRef> |
495 | tryBindSmallStruct(RegionBindingsConstRef B, const TypedValueRegion *R, |
496 | const RecordDecl *RD, nonloc::LazyCompoundVal LCV); |
497 | |
498 | /// BindStruct - Bind a compound value to a structure. |
499 | RegionBindingsRef bindStruct(RegionBindingsConstRef B, |
500 | const TypedValueRegion* R, SVal V); |
501 | |
502 | /// BindVector - Bind a compound value to a vector. |
503 | RegionBindingsRef bindVector(RegionBindingsConstRef B, |
504 | const TypedValueRegion* R, SVal V); |
505 | |
506 | std::optional<RegionBindingsRef> |
507 | tryBindSmallArray(RegionBindingsConstRef B, const TypedValueRegion *R, |
508 | const ArrayType *AT, nonloc::LazyCompoundVal LCV); |
509 | |
510 | RegionBindingsRef bindArray(RegionBindingsConstRef B, |
511 | const TypedValueRegion* R, |
512 | SVal V); |
513 | |
514 | /// Clears out all bindings in the given region and assigns a new value |
515 | /// as a Default binding. |
516 | RegionBindingsRef bindAggregate(RegionBindingsConstRef B, |
517 | const TypedRegion *R, |
518 | SVal DefaultVal); |
519 | |
520 | /// Create a new store with the specified binding removed. |
521 | /// \param ST the original store, that is the basis for the new store. |
522 | /// \param L the location whose binding should be removed. |
523 | StoreRef killBinding(Store ST, Loc L) override; |
524 | |
525 | void incrementReferenceCount(Store store) override { |
526 | getRegionBindings(store).manualRetain(); |
527 | } |
528 | |
529 | /// If the StoreManager supports it, decrement the reference count of |
530 | /// the specified Store object. If the reference count hits 0, the memory |
531 | /// associated with the object is recycled. |
532 | void decrementReferenceCount(Store store) override { |
533 | getRegionBindings(store).manualRelease(); |
534 | } |
535 | |
536 | bool includedInBindings(Store store, const MemRegion *region) const override; |
537 | |
538 | /// Return the value bound to specified location in a given state. |
539 | /// |
540 | /// The high level logic for this method is this: |
541 | /// getBinding (L) |
542 | /// if L has binding |
543 | /// return L's binding |
544 | /// else if L is in killset |
545 | /// return unknown |
546 | /// else |
547 | /// if L is on stack or heap |
548 | /// return undefined |
549 | /// else |
550 | /// return symbolic |
551 | SVal getBinding(Store S, Loc L, QualType T) override { |
552 | return getBinding(B: getRegionBindings(store: S), L, T); |
553 | } |
554 | |
555 | std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override { |
556 | RegionBindingsRef B = getRegionBindings(store: S); |
557 | // Default bindings are always applied over a base region so look up the |
558 | // base region's default binding, otherwise the lookup will fail when R |
559 | // is at an offset from R->getBaseRegion(). |
560 | return B.getDefaultBinding(R: R->getBaseRegion()); |
561 | } |
562 | |
563 | SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); |
564 | |
565 | SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R); |
566 | |
567 | SVal getBindingForField(RegionBindingsConstRef B, const FieldRegion *R); |
568 | |
569 | SVal getBindingForObjCIvar(RegionBindingsConstRef B, const ObjCIvarRegion *R); |
570 | |
571 | SVal getBindingForVar(RegionBindingsConstRef B, const VarRegion *R); |
572 | |
573 | SVal getBindingForLazySymbol(const TypedValueRegion *R); |
574 | |
575 | SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B, |
576 | const TypedValueRegion *R, |
577 | QualType Ty); |
578 | |
579 | SVal getLazyBinding(const SubRegion *LazyBindingRegion, |
580 | RegionBindingsRef LazyBinding); |
581 | |
582 | /// Get bindings for the values in a struct and return a CompoundVal, used |
583 | /// when doing struct copy: |
584 | /// struct s x, y; |
585 | /// x = y; |
586 | /// y's value is retrieved by this method. |
587 | SVal getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R); |
588 | SVal getBindingForArray(RegionBindingsConstRef B, const TypedValueRegion *R); |
589 | NonLoc createLazyBinding(RegionBindingsConstRef B, const TypedValueRegion *R); |
590 | |
591 | /// Used to lazily generate derived symbols for bindings that are defined |
592 | /// implicitly by default bindings in a super region. |
593 | /// |
594 | /// Note that callers may need to specially handle LazyCompoundVals, which |
595 | /// are returned as is in case the caller needs to treat them differently. |
596 | std::optional<SVal> |
597 | getBindingForDerivedDefaultValue(RegionBindingsConstRef B, |
598 | const MemRegion *superR, |
599 | const TypedValueRegion *R, QualType Ty); |
600 | |
601 | /// Get the state and region whose binding this region \p R corresponds to. |
602 | /// |
603 | /// If there is no lazy binding for \p R, the returned value will have a null |
604 | /// \c second. Note that a null pointer can represents a valid Store. |
605 | std::pair<Store, const SubRegion *> |
606 | findLazyBinding(RegionBindingsConstRef B, const SubRegion *R, |
607 | const SubRegion *originalRegion); |
608 | |
609 | /// Returns the cached set of interesting SVals contained within a lazy |
610 | /// binding. |
611 | /// |
612 | /// The precise value of "interesting" is determined for the purposes of |
613 | /// RegionStore's internal analysis. It must always contain all regions and |
614 | /// symbols, but may omit constants and other kinds of SVal. |
615 | /// |
616 | /// In contrast to compound values, LazyCompoundVals are also added |
617 | /// to the 'interesting values' list in addition to the child interesting |
618 | /// values. |
619 | const SValListTy &getInterestingValues(nonloc::LazyCompoundVal LCV); |
620 | |
621 | //===------------------------------------------------------------------===// |
622 | // State pruning. |
623 | //===------------------------------------------------------------------===// |
624 | |
625 | /// removeDeadBindings - Scans the RegionStore of 'state' for dead values. |
626 | /// It returns a new Store with these values removed. |
627 | StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx, |
628 | SymbolReaper& SymReaper) override; |
629 | |
630 | //===------------------------------------------------------------------===// |
631 | // Utility methods. |
632 | //===------------------------------------------------------------------===// |
633 | |
634 | RegionBindingsRef getRegionBindings(Store store) const { |
635 | llvm::PointerIntPair<Store, 1, bool> Ptr; |
636 | Ptr.setFromOpaqueValue(const_cast<void *>(store)); |
637 | return RegionBindingsRef( |
638 | CBFactory, |
639 | static_cast<const RegionBindings::TreeTy *>(Ptr.getPointer()), |
640 | RBFactory.getTreeFactory(), |
641 | Ptr.getInt()); |
642 | } |
643 | |
644 | void printJson(raw_ostream &Out, Store S, const char *NL = "\n" , |
645 | unsigned int Space = 0, bool IsDot = false) const override; |
646 | |
647 | void iterBindings(Store store, BindingsHandler& f) override { |
648 | RegionBindingsRef B = getRegionBindings(store); |
649 | for (const auto &[Region, Cluster] : B) { |
650 | for (const auto &[Key, Value] : Cluster) { |
651 | if (!Key.isDirect()) |
652 | continue; |
653 | if (const SubRegion *R = dyn_cast<SubRegion>(Val: Key.getRegion())) { |
654 | // FIXME: Possibly incorporate the offset? |
655 | if (!f.HandleBinding(SMgr&: *this, store, region: R, val: Value)) |
656 | return; |
657 | } |
658 | } |
659 | } |
660 | } |
661 | }; |
662 | |
663 | } // end anonymous namespace |
664 | |
665 | //===----------------------------------------------------------------------===// |
666 | // RegionStore creation. |
667 | //===----------------------------------------------------------------------===// |
668 | |
669 | std::unique_ptr<StoreManager> |
670 | ento::CreateRegionStoreManager(ProgramStateManager &StMgr) { |
671 | return std::make_unique<RegionStoreManager>(args&: StMgr); |
672 | } |
673 | |
674 | //===----------------------------------------------------------------------===// |
675 | // Region Cluster analysis. |
676 | //===----------------------------------------------------------------------===// |
677 | |
678 | namespace { |
679 | /// Used to determine which global regions are automatically included in the |
680 | /// initial worklist of a ClusterAnalysis. |
681 | enum GlobalsFilterKind { |
682 | /// Don't include any global regions. |
683 | GFK_None, |
684 | /// Only include system globals. |
685 | GFK_SystemOnly, |
686 | /// Include all global regions. |
687 | GFK_All |
688 | }; |
689 | |
690 | template <typename DERIVED> |
691 | class ClusterAnalysis { |
692 | protected: |
693 | typedef llvm::DenseMap<const MemRegion *, const ClusterBindings *> ClusterMap; |
694 | typedef const MemRegion * WorkListElement; |
695 | typedef SmallVector<WorkListElement, 10> WorkList; |
696 | |
697 | llvm::SmallPtrSet<const ClusterBindings *, 16> Visited; |
698 | |
699 | WorkList WL; |
700 | |
701 | RegionStoreManager &RM; |
702 | ASTContext &Ctx; |
703 | SValBuilder &svalBuilder; |
704 | |
705 | RegionBindingsRef B; |
706 | |
707 | |
708 | protected: |
709 | const ClusterBindings *getCluster(const MemRegion *R) { |
710 | return B.lookup(K: R); |
711 | } |
712 | |
713 | /// Returns true if all clusters in the given memspace should be initially |
714 | /// included in the cluster analysis. Subclasses may provide their |
715 | /// own implementation. |
716 | bool includeEntireMemorySpace(const MemRegion *Base) { |
717 | return false; |
718 | } |
719 | |
720 | public: |
721 | ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr, |
722 | RegionBindingsRef b) |
723 | : RM(rm), Ctx(StateMgr.getContext()), |
724 | svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {} |
725 | |
726 | RegionBindingsRef getRegionBindings() const { return B; } |
727 | |
728 | bool isVisited(const MemRegion *R) { |
729 | return Visited.count(Ptr: getCluster(R)); |
730 | } |
731 | |
732 | void GenerateClusters() { |
733 | // Scan the entire set of bindings and record the region clusters. |
734 | for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); |
735 | RI != RE; ++RI){ |
736 | const MemRegion *Base = RI.getKey(); |
737 | |
738 | const ClusterBindings &Cluster = RI.getData(); |
739 | assert(!Cluster.isEmpty() && "Empty clusters should be removed" ); |
740 | static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster); |
741 | |
742 | // If the base's memspace should be entirely invalidated, add the cluster |
743 | // to the workspace up front. |
744 | if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base)) |
745 | AddToWorkList(WorkListElement(Base), &Cluster); |
746 | } |
747 | } |
748 | |
749 | bool AddToWorkList(WorkListElement E, const ClusterBindings *C) { |
750 | if (C && !Visited.insert(Ptr: C).second) |
751 | return false; |
752 | WL.push_back(Elt: E); |
753 | return true; |
754 | } |
755 | |
756 | bool AddToWorkList(const MemRegion *R) { |
757 | return static_cast<DERIVED*>(this)->AddToWorkList(R); |
758 | } |
759 | |
760 | void RunWorkList() { |
761 | while (!WL.empty()) { |
762 | WorkListElement E = WL.pop_back_val(); |
763 | const MemRegion *BaseR = E; |
764 | |
765 | static_cast<DERIVED*>(this)->VisitCluster(BaseR, getCluster(R: BaseR)); |
766 | } |
767 | } |
768 | |
769 | void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C) {} |
770 | void VisitCluster(const MemRegion *baseR, const ClusterBindings *C) {} |
771 | |
772 | void VisitCluster(const MemRegion *BaseR, const ClusterBindings *C, |
773 | bool Flag) { |
774 | static_cast<DERIVED*>(this)->VisitCluster(BaseR, C); |
775 | } |
776 | }; |
777 | } |
778 | |
779 | //===----------------------------------------------------------------------===// |
780 | // Binding invalidation. |
781 | //===----------------------------------------------------------------------===// |
782 | |
783 | bool RegionStoreManager::scanReachableSymbols(Store S, const MemRegion *R, |
784 | ScanReachableSymbols &Callbacks) { |
785 | assert(R == R->getBaseRegion() && "Should only be called for base regions" ); |
786 | RegionBindingsRef B = getRegionBindings(store: S); |
787 | const ClusterBindings *Cluster = B.lookup(K: R); |
788 | |
789 | if (!Cluster) |
790 | return true; |
791 | |
792 | for (ClusterBindings::iterator RI = Cluster->begin(), RE = Cluster->end(); |
793 | RI != RE; ++RI) { |
794 | if (!Callbacks.scan(val: RI.getData())) |
795 | return false; |
796 | } |
797 | |
798 | return true; |
799 | } |
800 | |
801 | static inline bool isUnionField(const FieldRegion *FR) { |
802 | return FR->getDecl()->getParent()->isUnion(); |
803 | } |
804 | |
805 | typedef SmallVector<const FieldDecl *, 8> FieldVector; |
806 | |
807 | static void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) { |
808 | assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys" ); |
809 | |
810 | const MemRegion *Base = K.getConcreteOffsetRegion(); |
811 | const MemRegion *R = K.getRegion(); |
812 | |
813 | while (R != Base) { |
814 | if (const FieldRegion *FR = dyn_cast<FieldRegion>(Val: R)) |
815 | if (!isUnionField(FR)) |
816 | Fields.push_back(Elt: FR->getDecl()); |
817 | |
818 | R = cast<SubRegion>(Val: R)->getSuperRegion(); |
819 | } |
820 | } |
821 | |
822 | static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) { |
823 | assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys" ); |
824 | |
825 | if (Fields.empty()) |
826 | return true; |
827 | |
828 | FieldVector FieldsInBindingKey; |
829 | getSymbolicOffsetFields(K, Fields&: FieldsInBindingKey); |
830 | |
831 | ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size(); |
832 | if (Delta >= 0) |
833 | return std::equal(first1: FieldsInBindingKey.begin() + Delta, |
834 | last1: FieldsInBindingKey.end(), |
835 | first2: Fields.begin()); |
836 | else |
837 | return std::equal(first1: FieldsInBindingKey.begin(), last1: FieldsInBindingKey.end(), |
838 | first2: Fields.begin() - Delta); |
839 | } |
840 | |
841 | /// Collects all bindings in \p Cluster that may refer to bindings within |
842 | /// \p Top. |
843 | /// |
844 | /// Each binding is a pair whose \c first is the key (a BindingKey) and whose |
845 | /// \c second is the value (an SVal). |
846 | /// |
847 | /// The \p IncludeAllDefaultBindings parameter specifies whether to include |
848 | /// default bindings that may extend beyond \p Top itself, e.g. if \p Top is |
849 | /// an aggregate within a larger aggregate with a default binding. |
850 | static void |
851 | collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, |
852 | SValBuilder &SVB, const ClusterBindings &Cluster, |
853 | const SubRegion *Top, BindingKey TopKey, |
854 | bool IncludeAllDefaultBindings) { |
855 | FieldVector FieldsInSymbolicSubregions; |
856 | if (TopKey.hasSymbolicOffset()) { |
857 | getSymbolicOffsetFields(K: TopKey, Fields&: FieldsInSymbolicSubregions); |
858 | Top = TopKey.getConcreteOffsetRegion(); |
859 | TopKey = BindingKey::Make(R: Top, k: BindingKey::Default); |
860 | } |
861 | |
862 | // Find the length (in bits) of the region being invalidated. |
863 | uint64_t Length = UINT64_MAX; |
864 | SVal Extent = Top->getMemRegionManager().getStaticSize(MR: Top, SVB); |
865 | if (std::optional<nonloc::ConcreteInt> ExtentCI = |
866 | Extent.getAs<nonloc::ConcreteInt>()) { |
867 | const llvm::APSInt &ExtentInt = ExtentCI->getValue(); |
868 | assert(ExtentInt.isNonNegative() || ExtentInt.isUnsigned()); |
869 | // Extents are in bytes but region offsets are in bits. Be careful! |
870 | Length = ExtentInt.getLimitedValue() * SVB.getContext().getCharWidth(); |
871 | } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Val: Top)) { |
872 | if (FR->getDecl()->isBitField()) |
873 | Length = FR->getDecl()->getBitWidthValue(Ctx: SVB.getContext()); |
874 | } |
875 | |
876 | for (const auto &StoreEntry : Cluster) { |
877 | BindingKey NextKey = StoreEntry.first; |
878 | if (NextKey.getRegion() == TopKey.getRegion()) { |
879 | // FIXME: This doesn't catch the case where we're really invalidating a |
880 | // region with a symbolic offset. Example: |
881 | // R: points[i].y |
882 | // Next: points[0].x |
883 | |
884 | if (NextKey.getOffset() > TopKey.getOffset() && |
885 | NextKey.getOffset() - TopKey.getOffset() < Length) { |
886 | // Case 1: The next binding is inside the region we're invalidating. |
887 | // Include it. |
888 | Bindings.push_back(Elt: StoreEntry); |
889 | |
890 | } else if (NextKey.getOffset() == TopKey.getOffset()) { |
891 | // Case 2: The next binding is at the same offset as the region we're |
892 | // invalidating. In this case, we need to leave default bindings alone, |
893 | // since they may be providing a default value for a regions beyond what |
894 | // we're invalidating. |
895 | // FIXME: This is probably incorrect; consider invalidating an outer |
896 | // struct whose first field is bound to a LazyCompoundVal. |
897 | if (IncludeAllDefaultBindings || NextKey.isDirect()) |
898 | Bindings.push_back(Elt: StoreEntry); |
899 | } |
900 | |
901 | } else if (NextKey.hasSymbolicOffset()) { |
902 | const MemRegion *Base = NextKey.getConcreteOffsetRegion(); |
903 | if (Top->isSubRegionOf(R: Base) && Top != Base) { |
904 | // Case 3: The next key is symbolic and we just changed something within |
905 | // its concrete region. We don't know if the binding is still valid, so |
906 | // we'll be conservative and include it. |
907 | if (IncludeAllDefaultBindings || NextKey.isDirect()) |
908 | if (isCompatibleWithFields(K: NextKey, Fields: FieldsInSymbolicSubregions)) |
909 | Bindings.push_back(Elt: StoreEntry); |
910 | } else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Val: Base)) { |
911 | // Case 4: The next key is symbolic, but we changed a known |
912 | // super-region. In this case the binding is certainly included. |
913 | if (BaseSR->isSubRegionOf(R: Top)) |
914 | if (isCompatibleWithFields(K: NextKey, Fields: FieldsInSymbolicSubregions)) |
915 | Bindings.push_back(Elt: StoreEntry); |
916 | } |
917 | } |
918 | } |
919 | } |
920 | |
921 | static void |
922 | collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, |
923 | SValBuilder &SVB, const ClusterBindings &Cluster, |
924 | const SubRegion *Top, bool IncludeAllDefaultBindings) { |
925 | collectSubRegionBindings(Bindings, SVB, Cluster, Top, |
926 | TopKey: BindingKey::Make(R: Top, k: BindingKey::Default), |
927 | IncludeAllDefaultBindings); |
928 | } |
929 | |
930 | RegionBindingsRef |
931 | RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B, |
932 | const SubRegion *Top) { |
933 | BindingKey TopKey = BindingKey::Make(R: Top, k: BindingKey::Default); |
934 | const MemRegion *ClusterHead = TopKey.getBaseRegion(); |
935 | |
936 | if (Top == ClusterHead) { |
937 | // We can remove an entire cluster's bindings all in one go. |
938 | return B.remove(K: Top); |
939 | } |
940 | |
941 | const ClusterBindings *Cluster = B.lookup(K: ClusterHead); |
942 | if (!Cluster) { |
943 | // If we're invalidating a region with a symbolic offset, we need to make |
944 | // sure we don't treat the base region as uninitialized anymore. |
945 | if (TopKey.hasSymbolicOffset()) { |
946 | const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); |
947 | return B.addBinding(R: Concrete, k: BindingKey::Default, V: UnknownVal()); |
948 | } |
949 | return B; |
950 | } |
951 | |
952 | SmallVector<BindingPair, 32> Bindings; |
953 | collectSubRegionBindings(Bindings, SVB&: svalBuilder, Cluster: *Cluster, Top, TopKey, |
954 | /*IncludeAllDefaultBindings=*/false); |
955 | |
956 | ClusterBindingsRef Result(*Cluster, CBFactory); |
957 | for (BindingKey Key : llvm::make_first_range(c&: Bindings)) |
958 | Result = Result.remove(K: Key); |
959 | |
960 | // If we're invalidating a region with a symbolic offset, we need to make sure |
961 | // we don't treat the base region as uninitialized anymore. |
962 | // FIXME: This isn't very precise; see the example in |
963 | // collectSubRegionBindings. |
964 | if (TopKey.hasSymbolicOffset()) { |
965 | const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); |
966 | Result = Result.add(K: BindingKey::Make(R: Concrete, k: BindingKey::Default), |
967 | D: UnknownVal()); |
968 | } |
969 | |
970 | if (Result.isEmpty()) |
971 | return B.remove(K: ClusterHead); |
972 | return B.add(K: ClusterHead, D: Result.asImmutableMap()); |
973 | } |
974 | |
975 | namespace { |
976 | class InvalidateRegionsWorker : public ClusterAnalysis<InvalidateRegionsWorker> |
977 | { |
978 | const Expr *Ex; |
979 | unsigned Count; |
980 | const LocationContext *LCtx; |
981 | InvalidatedSymbols &IS; |
982 | RegionAndSymbolInvalidationTraits &ITraits; |
983 | StoreManager::InvalidatedRegions *Regions; |
984 | GlobalsFilterKind GlobalsFilter; |
985 | public: |
986 | InvalidateRegionsWorker(RegionStoreManager &rm, |
987 | ProgramStateManager &stateMgr, |
988 | RegionBindingsRef b, |
989 | const Expr *ex, unsigned count, |
990 | const LocationContext *lctx, |
991 | InvalidatedSymbols &is, |
992 | RegionAndSymbolInvalidationTraits &ITraitsIn, |
993 | StoreManager::InvalidatedRegions *r, |
994 | GlobalsFilterKind GFK) |
995 | : ClusterAnalysis<InvalidateRegionsWorker>(rm, stateMgr, b), |
996 | Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), |
997 | GlobalsFilter(GFK) {} |
998 | |
999 | void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); |
1000 | void VisitBinding(SVal V); |
1001 | |
1002 | using ClusterAnalysis::AddToWorkList; |
1003 | |
1004 | bool AddToWorkList(const MemRegion *R); |
1005 | |
1006 | /// Returns true if all clusters in the memory space for \p Base should be |
1007 | /// be invalidated. |
1008 | bool includeEntireMemorySpace(const MemRegion *Base); |
1009 | |
1010 | /// Returns true if the memory space of the given region is one of the global |
1011 | /// regions specially included at the start of invalidation. |
1012 | bool isInitiallyIncludedGlobalRegion(const MemRegion *R); |
1013 | }; |
1014 | } |
1015 | |
1016 | bool InvalidateRegionsWorker::AddToWorkList(const MemRegion *R) { |
1017 | bool doNotInvalidateSuperRegion = ITraits.hasTrait( |
1018 | MR: R, IK: RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); |
1019 | const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion(); |
1020 | return AddToWorkList(E: WorkListElement(BaseR), C: getCluster(R: BaseR)); |
1021 | } |
1022 | |
1023 | void InvalidateRegionsWorker::VisitBinding(SVal V) { |
1024 | // A symbol? Mark it touched by the invalidation. |
1025 | if (SymbolRef Sym = V.getAsSymbol()) |
1026 | IS.insert(V: Sym); |
1027 | |
1028 | if (const MemRegion *R = V.getAsRegion()) { |
1029 | AddToWorkList(R); |
1030 | return; |
1031 | } |
1032 | |
1033 | // Is it a LazyCompoundVal? All references get invalidated as well. |
1034 | if (std::optional<nonloc::LazyCompoundVal> LCS = |
1035 | V.getAs<nonloc::LazyCompoundVal>()) { |
1036 | |
1037 | // `getInterestingValues()` returns SVals contained within LazyCompoundVals, |
1038 | // so there is no need to visit them. |
1039 | for (SVal V : RM.getInterestingValues(LCV: *LCS)) |
1040 | if (!isa<nonloc::LazyCompoundVal>(Val: V)) |
1041 | VisitBinding(V); |
1042 | |
1043 | return; |
1044 | } |
1045 | } |
1046 | |
1047 | void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, |
1048 | const ClusterBindings *C) { |
1049 | |
1050 | bool PreserveRegionsContents = |
1051 | ITraits.hasTrait(MR: baseR, |
1052 | IK: RegionAndSymbolInvalidationTraits::TK_PreserveContents); |
1053 | |
1054 | if (C) { |
1055 | for (SVal Val : llvm::make_second_range(c: *C)) |
1056 | VisitBinding(V: Val); |
1057 | |
1058 | // Invalidate regions contents. |
1059 | if (!PreserveRegionsContents) |
1060 | B = B.remove(K: baseR); |
1061 | } |
1062 | |
1063 | if (const auto *TO = dyn_cast<TypedValueRegion>(Val: baseR)) { |
1064 | if (const auto *RD = TO->getValueType()->getAsCXXRecordDecl()) { |
1065 | |
1066 | // Lambdas can affect all static local variables without explicitly |
1067 | // capturing those. |
1068 | // We invalidate all static locals referenced inside the lambda body. |
1069 | if (RD->isLambda() && RD->getLambdaCallOperator()->getBody()) { |
1070 | using namespace ast_matchers; |
1071 | |
1072 | const char *DeclBind = "DeclBind" ; |
1073 | StatementMatcher RefToStatic = stmt(hasDescendant(declRefExpr( |
1074 | to(InnerMatcher: varDecl(hasStaticStorageDuration()).bind(ID: DeclBind))))); |
1075 | auto Matches = |
1076 | match(RefToStatic, *RD->getLambdaCallOperator()->getBody(), |
1077 | RD->getASTContext()); |
1078 | |
1079 | for (BoundNodes &Match : Matches) { |
1080 | auto *VD = Match.getNodeAs<VarDecl>(DeclBind); |
1081 | const VarRegion *ToInvalidate = |
1082 | RM.getRegionManager().getVarRegion(VD, LCtx); |
1083 | AddToWorkList(ToInvalidate); |
1084 | } |
1085 | } |
1086 | } |
1087 | } |
1088 | |
1089 | // BlockDataRegion? If so, invalidate captured variables that are passed |
1090 | // by reference. |
1091 | if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(Val: baseR)) { |
1092 | for (auto Var : BR->referenced_vars()) { |
1093 | const VarRegion *VR = Var.getCapturedRegion(); |
1094 | const VarDecl *VD = VR->getDecl(); |
1095 | if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) { |
1096 | AddToWorkList(R: VR); |
1097 | } |
1098 | else if (Loc::isLocType(T: VR->getValueType())) { |
1099 | // Map the current bindings to a Store to retrieve the value |
1100 | // of the binding. If that binding itself is a region, we should |
1101 | // invalidate that region. This is because a block may capture |
1102 | // a pointer value, but the thing pointed by that pointer may |
1103 | // get invalidated. |
1104 | SVal V = RM.getBinding(B, L: loc::MemRegionVal(VR)); |
1105 | if (std::optional<Loc> L = V.getAs<Loc>()) { |
1106 | if (const MemRegion *LR = L->getAsRegion()) |
1107 | AddToWorkList(R: LR); |
1108 | } |
1109 | } |
1110 | } |
1111 | return; |
1112 | } |
1113 | |
1114 | // Symbolic region? |
1115 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: baseR)) |
1116 | IS.insert(V: SR->getSymbol()); |
1117 | |
1118 | // Nothing else should be done in the case when we preserve regions context. |
1119 | if (PreserveRegionsContents) |
1120 | return; |
1121 | |
1122 | // Otherwise, we have a normal data region. Record that we touched the region. |
1123 | if (Regions) |
1124 | Regions->push_back(Elt: baseR); |
1125 | |
1126 | if (isa<AllocaRegion, SymbolicRegion>(Val: baseR)) { |
1127 | // Invalidate the region by setting its default value to |
1128 | // conjured symbol. The type of the symbol is irrelevant. |
1129 | DefinedOrUnknownSVal V = |
1130 | svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); |
1131 | B = B.addBinding(R: baseR, k: BindingKey::Default, V); |
1132 | return; |
1133 | } |
1134 | |
1135 | if (!baseR->isBoundable()) |
1136 | return; |
1137 | |
1138 | const TypedValueRegion *TR = cast<TypedValueRegion>(Val: baseR); |
1139 | QualType T = TR->getValueType(); |
1140 | |
1141 | if (isInitiallyIncludedGlobalRegion(R: baseR)) { |
1142 | // If the region is a global and we are invalidating all globals, |
1143 | // erasing the entry is good enough. This causes all globals to be lazily |
1144 | // symbolicated from the same base symbol. |
1145 | return; |
1146 | } |
1147 | |
1148 | if (T->isRecordType()) { |
1149 | // Invalidate the region by setting its default value to |
1150 | // conjured symbol. The type of the symbol is irrelevant. |
1151 | DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, |
1152 | Ctx.IntTy, Count); |
1153 | B = B.addBinding(R: baseR, k: BindingKey::Default, V); |
1154 | return; |
1155 | } |
1156 | |
1157 | if (const ArrayType *AT = Ctx.getAsArrayType(T)) { |
1158 | bool doNotInvalidateSuperRegion = ITraits.hasTrait( |
1159 | MR: baseR, |
1160 | IK: RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); |
1161 | |
1162 | if (doNotInvalidateSuperRegion) { |
1163 | // We are not doing blank invalidation of the whole array region so we |
1164 | // have to manually invalidate each elements. |
1165 | std::optional<uint64_t> NumElements; |
1166 | |
1167 | // Compute lower and upper offsets for region within array. |
1168 | if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Val: AT)) |
1169 | NumElements = CAT->getZExtSize(); |
1170 | if (!NumElements) // We are not dealing with a constant size array |
1171 | goto conjure_default; |
1172 | QualType ElementTy = AT->getElementType(); |
1173 | uint64_t ElemSize = Ctx.getTypeSize(T: ElementTy); |
1174 | const RegionOffset &RO = baseR->getAsOffset(); |
1175 | const MemRegion *SuperR = baseR->getBaseRegion(); |
1176 | if (RO.hasSymbolicOffset()) { |
1177 | // If base region has a symbolic offset, |
1178 | // we revert to invalidating the super region. |
1179 | if (SuperR) |
1180 | AddToWorkList(R: SuperR); |
1181 | goto conjure_default; |
1182 | } |
1183 | |
1184 | uint64_t LowerOffset = RO.getOffset(); |
1185 | uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize; |
1186 | bool UpperOverflow = UpperOffset < LowerOffset; |
1187 | |
1188 | // Invalidate regions which are within array boundaries, |
1189 | // or have a symbolic offset. |
1190 | if (!SuperR) |
1191 | goto conjure_default; |
1192 | |
1193 | const ClusterBindings *C = B.lookup(K: SuperR); |
1194 | if (!C) |
1195 | goto conjure_default; |
1196 | |
1197 | for (const auto &[BK, V] : *C) { |
1198 | std::optional<uint64_t> ROffset = |
1199 | BK.hasSymbolicOffset() ? std::optional<uint64_t>() : BK.getOffset(); |
1200 | |
1201 | // Check offset is not symbolic and within array's boundaries. |
1202 | // Handles arrays of 0 elements and of 0-sized elements as well. |
1203 | if (!ROffset || |
1204 | ((*ROffset >= LowerOffset && *ROffset < UpperOffset) || |
1205 | (UpperOverflow && |
1206 | (*ROffset >= LowerOffset || *ROffset < UpperOffset)) || |
1207 | (LowerOffset == UpperOffset && *ROffset == LowerOffset))) { |
1208 | B = B.removeBinding(K: BK); |
1209 | // Bound symbolic regions need to be invalidated for dead symbol |
1210 | // detection. |
1211 | const MemRegion *R = V.getAsRegion(); |
1212 | if (isa_and_nonnull<SymbolicRegion>(Val: R)) |
1213 | VisitBinding(V); |
1214 | } |
1215 | } |
1216 | } |
1217 | conjure_default: |
1218 | // Set the default value of the array to conjured symbol. |
1219 | DefinedOrUnknownSVal V = |
1220 | svalBuilder.conjureSymbolVal(symbolTag: baseR, expr: Ex, LCtx, |
1221 | type: AT->getElementType(), count: Count); |
1222 | B = B.addBinding(R: baseR, k: BindingKey::Default, V); |
1223 | return; |
1224 | } |
1225 | |
1226 | DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(symbolTag: baseR, expr: Ex, LCtx, |
1227 | type: T,count: Count); |
1228 | assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); |
1229 | B = B.addBinding(R: baseR, k: BindingKey::Direct, V); |
1230 | } |
1231 | |
1232 | bool InvalidateRegionsWorker::isInitiallyIncludedGlobalRegion( |
1233 | const MemRegion *R) { |
1234 | switch (GlobalsFilter) { |
1235 | case GFK_None: |
1236 | return false; |
1237 | case GFK_SystemOnly: |
1238 | return isa<GlobalSystemSpaceRegion>(Val: R->getMemorySpace()); |
1239 | case GFK_All: |
1240 | return isa<NonStaticGlobalSpaceRegion>(Val: R->getMemorySpace()); |
1241 | } |
1242 | |
1243 | llvm_unreachable("unknown globals filter" ); |
1244 | } |
1245 | |
1246 | bool InvalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) { |
1247 | if (isInitiallyIncludedGlobalRegion(R: Base)) |
1248 | return true; |
1249 | |
1250 | const MemSpaceRegion *MemSpace = Base->getMemorySpace(); |
1251 | return ITraits.hasTrait(MR: MemSpace, |
1252 | IK: RegionAndSymbolInvalidationTraits::TK_EntireMemSpace); |
1253 | } |
1254 | |
1255 | RegionBindingsRef |
1256 | RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, |
1257 | const Expr *Ex, |
1258 | unsigned Count, |
1259 | const LocationContext *LCtx, |
1260 | RegionBindingsRef B, |
1261 | InvalidatedRegions *Invalidated) { |
1262 | // Bind the globals memory space to a new symbol that we will use to derive |
1263 | // the bindings for all globals. |
1264 | const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); |
1265 | SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx, |
1266 | /* type does not matter */ Ctx.IntTy, |
1267 | Count); |
1268 | |
1269 | B = B.removeBinding(R: GS) |
1270 | .addBinding(K: BindingKey::Make(R: GS, k: BindingKey::Default), V); |
1271 | |
1272 | // Even if there are no bindings in the global scope, we still need to |
1273 | // record that we touched it. |
1274 | if (Invalidated) |
1275 | Invalidated->push_back(Elt: GS); |
1276 | |
1277 | return B; |
1278 | } |
1279 | |
1280 | void RegionStoreManager::populateWorkList(InvalidateRegionsWorker &W, |
1281 | ArrayRef<SVal> Values, |
1282 | InvalidatedRegions *TopLevelRegions) { |
1283 | for (SVal V : Values) { |
1284 | if (auto LCS = V.getAs<nonloc::LazyCompoundVal>()) { |
1285 | for (SVal S : getInterestingValues(LCV: *LCS)) |
1286 | if (const MemRegion *R = S.getAsRegion()) |
1287 | W.AddToWorkList(R); |
1288 | |
1289 | continue; |
1290 | } |
1291 | |
1292 | if (const MemRegion *R = V.getAsRegion()) { |
1293 | if (TopLevelRegions) |
1294 | TopLevelRegions->push_back(Elt: R); |
1295 | W.AddToWorkList(R); |
1296 | continue; |
1297 | } |
1298 | } |
1299 | } |
1300 | |
1301 | StoreRef |
1302 | RegionStoreManager::invalidateRegions(Store store, |
1303 | ArrayRef<SVal> Values, |
1304 | const Expr *Ex, unsigned Count, |
1305 | const LocationContext *LCtx, |
1306 | const CallEvent *Call, |
1307 | InvalidatedSymbols &IS, |
1308 | RegionAndSymbolInvalidationTraits &ITraits, |
1309 | InvalidatedRegions *TopLevelRegions, |
1310 | InvalidatedRegions *Invalidated) { |
1311 | GlobalsFilterKind GlobalsFilter; |
1312 | if (Call) { |
1313 | if (Call->isInSystemHeader()) |
1314 | GlobalsFilter = GFK_SystemOnly; |
1315 | else |
1316 | GlobalsFilter = GFK_All; |
1317 | } else { |
1318 | GlobalsFilter = GFK_None; |
1319 | } |
1320 | |
1321 | RegionBindingsRef B = getRegionBindings(store); |
1322 | InvalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits, |
1323 | Invalidated, GlobalsFilter); |
1324 | |
1325 | // Scan the bindings and generate the clusters. |
1326 | W.GenerateClusters(); |
1327 | |
1328 | // Add the regions to the worklist. |
1329 | populateWorkList(W, Values, TopLevelRegions); |
1330 | |
1331 | W.RunWorkList(); |
1332 | |
1333 | // Return the new bindings. |
1334 | B = W.getRegionBindings(); |
1335 | |
1336 | // For calls, determine which global regions should be invalidated and |
1337 | // invalidate them. (Note that function-static and immutable globals are never |
1338 | // invalidated by this.) |
1339 | // TODO: This could possibly be more precise with modules. |
1340 | switch (GlobalsFilter) { |
1341 | case GFK_All: |
1342 | B = invalidateGlobalRegion(K: MemRegion::GlobalInternalSpaceRegionKind, |
1343 | Ex, Count, LCtx, B, Invalidated); |
1344 | [[fallthrough]]; |
1345 | case GFK_SystemOnly: |
1346 | B = invalidateGlobalRegion(K: MemRegion::GlobalSystemSpaceRegionKind, |
1347 | Ex, Count, LCtx, B, Invalidated); |
1348 | [[fallthrough]]; |
1349 | case GFK_None: |
1350 | break; |
1351 | } |
1352 | |
1353 | return StoreRef(B.asStore(), *this); |
1354 | } |
1355 | |
1356 | //===----------------------------------------------------------------------===// |
1357 | // Location and region casting. |
1358 | //===----------------------------------------------------------------------===// |
1359 | |
1360 | /// ArrayToPointer - Emulates the "decay" of an array to a pointer |
1361 | /// type. 'Array' represents the lvalue of the array being decayed |
1362 | /// to a pointer, and the returned SVal represents the decayed |
1363 | /// version of that lvalue (i.e., a pointer to the first element of |
1364 | /// the array). This is called by ExprEngine when evaluating casts |
1365 | /// from arrays to pointers. |
1366 | SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) { |
1367 | if (isa<loc::ConcreteInt>(Val: Array)) |
1368 | return Array; |
1369 | |
1370 | if (!isa<loc::MemRegionVal>(Val: Array)) |
1371 | return UnknownVal(); |
1372 | |
1373 | const SubRegion *R = |
1374 | cast<SubRegion>(Val: Array.castAs<loc::MemRegionVal>().getRegion()); |
1375 | NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex(); |
1376 | return loc::MemRegionVal(MRMgr.getElementRegion(elementType: T, Idx: ZeroIdx, superRegion: R, Ctx)); |
1377 | } |
1378 | |
1379 | //===----------------------------------------------------------------------===// |
1380 | // Loading values from regions. |
1381 | //===----------------------------------------------------------------------===// |
1382 | |
1383 | SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) { |
1384 | assert(!isa<UnknownVal>(L) && "location unknown" ); |
1385 | assert(!isa<UndefinedVal>(L) && "location undefined" ); |
1386 | |
1387 | // For access to concrete addresses, return UnknownVal. Checks |
1388 | // for null dereferences (and similar errors) are done by checkers, not |
1389 | // the Store. |
1390 | // FIXME: We can consider lazily symbolicating such memory, but we really |
1391 | // should defer this when we can reason easily about symbolicating arrays |
1392 | // of bytes. |
1393 | if (L.getAs<loc::ConcreteInt>()) { |
1394 | return UnknownVal(); |
1395 | } |
1396 | if (!L.getAs<loc::MemRegionVal>()) { |
1397 | return UnknownVal(); |
1398 | } |
1399 | |
1400 | const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion(); |
1401 | |
1402 | if (isa<BlockDataRegion>(Val: MR)) { |
1403 | return UnknownVal(); |
1404 | } |
1405 | |
1406 | // Auto-detect the binding type. |
1407 | if (T.isNull()) { |
1408 | if (const auto *TVR = dyn_cast<TypedValueRegion>(Val: MR)) |
1409 | T = TVR->getValueType(); |
1410 | else if (const auto *TR = dyn_cast<TypedRegion>(Val: MR)) |
1411 | T = TR->getLocationType()->getPointeeType(); |
1412 | else if (const auto *SR = dyn_cast<SymbolicRegion>(Val: MR)) |
1413 | T = SR->getPointeeStaticType(); |
1414 | } |
1415 | assert(!T.isNull() && "Unable to auto-detect binding type!" ); |
1416 | assert(!T->isVoidType() && "Attempting to dereference a void pointer!" ); |
1417 | |
1418 | if (!isa<TypedValueRegion>(Val: MR)) |
1419 | MR = GetElementZeroRegion(R: cast<SubRegion>(Val: MR), T); |
1420 | |
1421 | // FIXME: Perhaps this method should just take a 'const MemRegion*' argument |
1422 | // instead of 'Loc', and have the other Loc cases handled at a higher level. |
1423 | const TypedValueRegion *R = cast<TypedValueRegion>(Val: MR); |
1424 | QualType RTy = R->getValueType(); |
1425 | |
1426 | // FIXME: we do not yet model the parts of a complex type, so treat the |
1427 | // whole thing as "unknown". |
1428 | if (RTy->isAnyComplexType()) |
1429 | return UnknownVal(); |
1430 | |
1431 | // FIXME: We should eventually handle funny addressing. e.g.: |
1432 | // |
1433 | // int x = ...; |
1434 | // int *p = &x; |
1435 | // char *q = (char*) p; |
1436 | // char c = *q; // returns the first byte of 'x'. |
1437 | // |
1438 | // Such funny addressing will occur due to layering of regions. |
1439 | if (RTy->isStructureOrClassType()) |
1440 | return getBindingForStruct(B, R); |
1441 | |
1442 | // FIXME: Handle unions. |
1443 | if (RTy->isUnionType()) |
1444 | return createLazyBinding(B, R); |
1445 | |
1446 | if (RTy->isArrayType()) { |
1447 | if (RTy->isConstantArrayType()) |
1448 | return getBindingForArray(B, R); |
1449 | else |
1450 | return UnknownVal(); |
1451 | } |
1452 | |
1453 | // FIXME: handle Vector types. |
1454 | if (RTy->isVectorType()) |
1455 | return UnknownVal(); |
1456 | |
1457 | if (const FieldRegion* FR = dyn_cast<FieldRegion>(Val: R)) |
1458 | return svalBuilder.evalCast(V: getBindingForField(B, R: FR), CastTy: T, OriginalTy: QualType{}); |
1459 | |
1460 | if (const ElementRegion* ER = dyn_cast<ElementRegion>(Val: R)) { |
1461 | // FIXME: Here we actually perform an implicit conversion from the loaded |
1462 | // value to the element type. Eventually we want to compose these values |
1463 | // more intelligently. For example, an 'element' can encompass multiple |
1464 | // bound regions (e.g., several bound bytes), or could be a subset of |
1465 | // a larger value. |
1466 | return svalBuilder.evalCast(V: getBindingForElement(B, R: ER), CastTy: T, OriginalTy: QualType{}); |
1467 | } |
1468 | |
1469 | if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(Val: R)) { |
1470 | // FIXME: Here we actually perform an implicit conversion from the loaded |
1471 | // value to the ivar type. What we should model is stores to ivars |
1472 | // that blow past the extent of the ivar. If the address of the ivar is |
1473 | // reinterpretted, it is possible we stored a different value that could |
1474 | // fit within the ivar. Either we need to cast these when storing them |
1475 | // or reinterpret them lazily (as we do here). |
1476 | return svalBuilder.evalCast(V: getBindingForObjCIvar(B, R: IVR), CastTy: T, OriginalTy: QualType{}); |
1477 | } |
1478 | |
1479 | if (const VarRegion *VR = dyn_cast<VarRegion>(Val: R)) { |
1480 | // FIXME: Here we actually perform an implicit conversion from the loaded |
1481 | // value to the variable type. What we should model is stores to variables |
1482 | // that blow past the extent of the variable. If the address of the |
1483 | // variable is reinterpretted, it is possible we stored a different value |
1484 | // that could fit within the variable. Either we need to cast these when |
1485 | // storing them or reinterpret them lazily (as we do here). |
1486 | return svalBuilder.evalCast(V: getBindingForVar(B, R: VR), CastTy: T, OriginalTy: QualType{}); |
1487 | } |
1488 | |
1489 | const SVal *V = B.lookup(R, k: BindingKey::Direct); |
1490 | |
1491 | // Check if the region has a binding. |
1492 | if (V) |
1493 | return *V; |
1494 | |
1495 | // The location does not have a bound value. This means that it has |
1496 | // the value it had upon its creation and/or entry to the analyzed |
1497 | // function/method. These are either symbolic values or 'undefined'. |
1498 | if (R->hasStackNonParametersStorage()) { |
1499 | // All stack variables are considered to have undefined values |
1500 | // upon creation. All heap allocated blocks are considered to |
1501 | // have undefined values as well unless they are explicitly bound |
1502 | // to specific values. |
1503 | return UndefinedVal(); |
1504 | } |
1505 | |
1506 | // All other values are symbolic. |
1507 | return svalBuilder.getRegionValueSymbolVal(region: R); |
1508 | } |
1509 | |
1510 | static QualType getUnderlyingType(const SubRegion *R) { |
1511 | QualType RegionTy; |
1512 | if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(Val: R)) |
1513 | RegionTy = TVR->getValueType(); |
1514 | |
1515 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: R)) |
1516 | RegionTy = SR->getSymbol()->getType(); |
1517 | |
1518 | return RegionTy; |
1519 | } |
1520 | |
1521 | /// Checks to see if store \p B has a lazy binding for region \p R. |
1522 | /// |
1523 | /// If \p AllowSubregionBindings is \c false, a lazy binding will be rejected |
1524 | /// if there are additional bindings within \p R. |
1525 | /// |
1526 | /// Note that unlike RegionStoreManager::findLazyBinding, this will not search |
1527 | /// for lazy bindings for super-regions of \p R. |
1528 | static std::optional<nonloc::LazyCompoundVal> |
1529 | getExistingLazyBinding(SValBuilder &SVB, RegionBindingsConstRef B, |
1530 | const SubRegion *R, bool AllowSubregionBindings) { |
1531 | std::optional<SVal> V = B.getDefaultBinding(R); |
1532 | if (!V) |
1533 | return std::nullopt; |
1534 | |
1535 | std::optional<nonloc::LazyCompoundVal> LCV = |
1536 | V->getAs<nonloc::LazyCompoundVal>(); |
1537 | if (!LCV) |
1538 | return std::nullopt; |
1539 | |
1540 | // If the LCV is for a subregion, the types might not match, and we shouldn't |
1541 | // reuse the binding. |
1542 | QualType RegionTy = getUnderlyingType(R); |
1543 | if (!RegionTy.isNull() && |
1544 | !RegionTy->isVoidPointerType()) { |
1545 | QualType SourceRegionTy = LCV->getRegion()->getValueType(); |
1546 | if (!SVB.getContext().hasSameUnqualifiedType(T1: RegionTy, T2: SourceRegionTy)) |
1547 | return std::nullopt; |
1548 | } |
1549 | |
1550 | if (!AllowSubregionBindings) { |
1551 | // If there are any other bindings within this region, we shouldn't reuse |
1552 | // the top-level binding. |
1553 | SmallVector<BindingPair, 16> Bindings; |
1554 | collectSubRegionBindings(Bindings, SVB, Cluster: *B.lookup(K: R->getBaseRegion()), Top: R, |
1555 | /*IncludeAllDefaultBindings=*/true); |
1556 | if (Bindings.size() > 1) |
1557 | return std::nullopt; |
1558 | } |
1559 | |
1560 | return *LCV; |
1561 | } |
1562 | |
1563 | std::pair<Store, const SubRegion *> |
1564 | RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, |
1565 | const SubRegion *R, |
1566 | const SubRegion *originalRegion) { |
1567 | if (originalRegion != R) { |
1568 | if (std::optional<nonloc::LazyCompoundVal> V = |
1569 | getExistingLazyBinding(SVB&: svalBuilder, B, R, AllowSubregionBindings: true)) |
1570 | return std::make_pair(x: V->getStore(), y: V->getRegion()); |
1571 | } |
1572 | |
1573 | typedef std::pair<Store, const SubRegion *> StoreRegionPair; |
1574 | StoreRegionPair Result = StoreRegionPair(); |
1575 | |
1576 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: R)) { |
1577 | Result = findLazyBinding(B, R: cast<SubRegion>(ER->getSuperRegion()), |
1578 | originalRegion); |
1579 | |
1580 | if (Result.second) |
1581 | Result.second = MRMgr.getElementRegionWithSuper(ER, superRegion: Result.second); |
1582 | |
1583 | } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Val: R)) { |
1584 | Result = findLazyBinding(B, R: cast<SubRegion>(Val: FR->getSuperRegion()), |
1585 | originalRegion); |
1586 | |
1587 | if (Result.second) |
1588 | Result.second = MRMgr.getFieldRegionWithSuper(FR, superRegion: Result.second); |
1589 | |
1590 | } else if (const CXXBaseObjectRegion *BaseReg = |
1591 | dyn_cast<CXXBaseObjectRegion>(Val: R)) { |
1592 | // C++ base object region is another kind of region that we should blast |
1593 | // through to look for lazy compound value. It is like a field region. |
1594 | Result = findLazyBinding(B, R: cast<SubRegion>(Val: BaseReg->getSuperRegion()), |
1595 | originalRegion); |
1596 | |
1597 | if (Result.second) |
1598 | Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(baseReg: BaseReg, |
1599 | superRegion: Result.second); |
1600 | } |
1601 | |
1602 | return Result; |
1603 | } |
1604 | |
1605 | /// This is a helper function for `getConstantValFromConstArrayInitializer`. |
1606 | /// |
1607 | /// Return an array of extents of the declared array type. |
1608 | /// |
1609 | /// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. |
1610 | static SmallVector<uint64_t, 2> |
1611 | getConstantArrayExtents(const ConstantArrayType *CAT) { |
1612 | assert(CAT && "ConstantArrayType should not be null" ); |
1613 | CAT = cast<ConstantArrayType>(CAT->getCanonicalTypeInternal()); |
1614 | SmallVector<uint64_t, 2> Extents; |
1615 | do { |
1616 | Extents.push_back(Elt: CAT->getZExtSize()); |
1617 | } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType()))); |
1618 | return Extents; |
1619 | } |
1620 | |
1621 | /// This is a helper function for `getConstantValFromConstArrayInitializer`. |
1622 | /// |
1623 | /// Return an array of offsets from nested ElementRegions and a root base |
1624 | /// region. The array is never empty and a base region is never null. |
1625 | /// |
1626 | /// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. |
1627 | /// This represents an access through indirection: `arr[1][2][3];` |
1628 | /// |
1629 | /// \param ER The given (possibly nested) ElementRegion. |
1630 | /// |
1631 | /// \note The result array is in the reverse order of indirection expression: |
1632 | /// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n |
1633 | /// is a number of indirections. It may not affect performance in real-life |
1634 | /// code, though. |
1635 | static std::pair<SmallVector<SVal, 2>, const MemRegion *> |
1636 | getElementRegionOffsetsWithBase(const ElementRegion *ER) { |
1637 | assert(ER && "ConstantArrayType should not be null" ); |
1638 | const MemRegion *Base; |
1639 | SmallVector<SVal, 2> SValOffsets; |
1640 | do { |
1641 | SValOffsets.push_back(Elt: ER->getIndex()); |
1642 | Base = ER->getSuperRegion(); |
1643 | ER = dyn_cast<ElementRegion>(Val: Base); |
1644 | } while (ER); |
1645 | return {SValOffsets, Base}; |
1646 | } |
1647 | |
1648 | /// This is a helper function for `getConstantValFromConstArrayInitializer`. |
1649 | /// |
1650 | /// Convert array of offsets from `SVal` to `uint64_t` in consideration of |
1651 | /// respective array extents. |
1652 | /// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed |
1653 | /// order (expectedly received from `getElementRegionOffsetsWithBase`). |
1654 | /// \param ArrayExtents [in] The array of extents. |
1655 | /// \param DstOffsets [out] The array of offsets of type `uint64_t`. |
1656 | /// \returns: |
1657 | /// - `std::nullopt` for successful convertion. |
1658 | /// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal |
1659 | /// will be returned as a suitable value of the access operation. |
1660 | /// which should be returned as a correct |
1661 | /// |
1662 | /// \example: |
1663 | /// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } |
1664 | /// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } |
1665 | /// // DstOffsets { 4, 5, 6 } |
1666 | /// // returns std::nullopt |
1667 | /// int x2 = arr[42][5][-6]; // returns UndefinedVal |
1668 | /// int x3 = arr[4][5][x2]; // returns UnknownVal |
1669 | static std::optional<SVal> |
1670 | convertOffsetsFromSvalToUnsigneds(const SmallVector<SVal, 2> &SrcOffsets, |
1671 | const SmallVector<uint64_t, 2> ArrayExtents, |
1672 | SmallVector<uint64_t, 2> &DstOffsets) { |
1673 | // Check offsets for being out of bounds. |
1674 | // C++20 [expr.add] 7.6.6.4 (excerpt): |
1675 | // If P points to an array element i of an array object x with n |
1676 | // elements, where i < 0 or i > n, the behavior is undefined. |
1677 | // Dereferencing is not allowed on the "one past the last |
1678 | // element", when i == n. |
1679 | // Example: |
1680 | // const int arr[3][2] = {{1, 2}, {3, 4}}; |
1681 | // arr[0][0]; // 1 |
1682 | // arr[0][1]; // 2 |
1683 | // arr[0][2]; // UB |
1684 | // arr[1][0]; // 3 |
1685 | // arr[1][1]; // 4 |
1686 | // arr[1][-1]; // UB |
1687 | // arr[2][0]; // 0 |
1688 | // arr[2][1]; // 0 |
1689 | // arr[-2][0]; // UB |
1690 | DstOffsets.resize(N: SrcOffsets.size()); |
1691 | auto ExtentIt = ArrayExtents.begin(); |
1692 | auto OffsetIt = DstOffsets.begin(); |
1693 | // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. |
1694 | for (SVal V : llvm::reverse(C: SrcOffsets)) { |
1695 | if (auto CI = V.getAs<nonloc::ConcreteInt>()) { |
1696 | // When offset is out of array's bounds, result is UB. |
1697 | const llvm::APSInt &Offset = CI->getValue(); |
1698 | if (Offset.isNegative() || Offset.uge(RHS: *(ExtentIt++))) |
1699 | return UndefinedVal(); |
1700 | // Store index in a reversive order. |
1701 | *(OffsetIt++) = Offset.getZExtValue(); |
1702 | continue; |
1703 | } |
1704 | // Symbolic index presented. Return Unknown value. |
1705 | // FIXME: We also need to take ElementRegions with symbolic indexes into |
1706 | // account. |
1707 | return UnknownVal(); |
1708 | } |
1709 | return std::nullopt; |
1710 | } |
1711 | |
1712 | std::optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer( |
1713 | RegionBindingsConstRef B, const ElementRegion *R) { |
1714 | assert(R && "ElementRegion should not be null" ); |
1715 | |
1716 | // Treat an n-dimensional array. |
1717 | SmallVector<SVal, 2> SValOffsets; |
1718 | const MemRegion *Base; |
1719 | std::tie(args&: SValOffsets, args&: Base) = getElementRegionOffsetsWithBase(ER: R); |
1720 | const VarRegion *VR = dyn_cast<VarRegion>(Val: Base); |
1721 | if (!VR) |
1722 | return std::nullopt; |
1723 | |
1724 | assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " |
1725 | "offsets vector is not empty." ); |
1726 | |
1727 | // Check if the containing array has an initialized value that we can trust. |
1728 | // We can trust a const value or a value of a global initializer in main(). |
1729 | const VarDecl *VD = VR->getDecl(); |
1730 | if (!VD->getType().isConstQualified() && |
1731 | !R->getElementType().isConstQualified() && |
1732 | (!B.isMainAnalysis() || !VD->hasGlobalStorage())) |
1733 | return std::nullopt; |
1734 | |
1735 | // Array's declaration should have `ConstantArrayType` type, because only this |
1736 | // type contains an array extent. It may happen that array type can be of |
1737 | // `IncompleteArrayType` type. To get the declaration of `ConstantArrayType` |
1738 | // type, we should find the declaration in the redeclarations chain that has |
1739 | // the initialization expression. |
1740 | // NOTE: `getAnyInitializer` has an out-parameter, which returns a new `VD` |
1741 | // from which an initializer is obtained. We replace current `VD` with the new |
1742 | // `VD`. If the return value of the function is null than `VD` won't be |
1743 | // replaced. |
1744 | const Expr *Init = VD->getAnyInitializer(D&: VD); |
1745 | // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check |
1746 | // `Init` for null only and don't worry about the replaced `VD`. |
1747 | if (!Init) |
1748 | return std::nullopt; |
1749 | |
1750 | // Array's declaration should have ConstantArrayType type, because only this |
1751 | // type contains an array extent. |
1752 | const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T: VD->getType()); |
1753 | if (!CAT) |
1754 | return std::nullopt; |
1755 | |
1756 | // Get array extents. |
1757 | SmallVector<uint64_t, 2> Extents = getConstantArrayExtents(CAT); |
1758 | |
1759 | // The number of offsets should equal to the numbers of extents, |
1760 | // otherwise wrong type punning occurred. For instance: |
1761 | // int arr[1][2][3]; |
1762 | // auto ptr = (int(*)[42])arr; |
1763 | // auto x = ptr[4][2]; // UB |
1764 | // FIXME: Should return UndefinedVal. |
1765 | if (SValOffsets.size() != Extents.size()) |
1766 | return std::nullopt; |
1767 | |
1768 | SmallVector<uint64_t, 2> ConcreteOffsets; |
1769 | if (std::optional<SVal> V = convertOffsetsFromSvalToUnsigneds( |
1770 | SrcOffsets: SValOffsets, ArrayExtents: Extents, DstOffsets&: ConcreteOffsets)) |
1771 | return *V; |
1772 | |
1773 | // Handle InitListExpr. |
1774 | // Example: |
1775 | // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; |
1776 | if (const auto *ILE = dyn_cast<InitListExpr>(Val: Init)) |
1777 | return getSValFromInitListExpr(ILE, ConcreteOffsets, ElemT: R->getElementType()); |
1778 | |
1779 | // Handle StringLiteral. |
1780 | // Example: |
1781 | // const char arr[] = "abc"; |
1782 | if (const auto *SL = dyn_cast<StringLiteral>(Val: Init)) |
1783 | return getSValFromStringLiteral(SL, Offset: ConcreteOffsets.front(), |
1784 | ElemT: R->getElementType()); |
1785 | |
1786 | // FIXME: Handle CompoundLiteralExpr. |
1787 | |
1788 | return std::nullopt; |
1789 | } |
1790 | |
1791 | /// Returns an SVal, if possible, for the specified position of an |
1792 | /// initialization list. |
1793 | /// |
1794 | /// \param ILE The given initialization list. |
1795 | /// \param Offsets The array of unsigned offsets. E.g. for the expression |
1796 | /// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. |
1797 | /// \param ElemT The type of the result SVal expression. |
1798 | /// \return Optional SVal for the particular position in the initialization |
1799 | /// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: |
1800 | /// - {1, 1} returns SVal{4}, because it's the second position in the second |
1801 | /// sublist; |
1802 | /// - {3, 0} returns SVal{0}, because there's no explicit value at this |
1803 | /// position in the sublist. |
1804 | /// |
1805 | /// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets |
1806 | /// for the given initialization list. Otherwise SVal can be an equivalent to 0 |
1807 | /// or lead to assertion. |
1808 | std::optional<SVal> RegionStoreManager::getSValFromInitListExpr( |
1809 | const InitListExpr *ILE, const SmallVector<uint64_t, 2> &Offsets, |
1810 | QualType ElemT) { |
1811 | assert(ILE && "InitListExpr should not be null" ); |
1812 | |
1813 | for (uint64_t Offset : Offsets) { |
1814 | // C++20 [dcl.init.string] 9.4.2.1: |
1815 | // An array of ordinary character type [...] can be initialized by [...] |
1816 | // an appropriately-typed string-literal enclosed in braces. |
1817 | // Example: |
1818 | // const char arr[] = { "abc" }; |
1819 | if (ILE->isStringLiteralInit()) |
1820 | if (const auto *SL = dyn_cast<StringLiteral>(Val: ILE->getInit(Init: 0))) |
1821 | return getSValFromStringLiteral(SL, Offset, ElemT); |
1822 | |
1823 | // C++20 [expr.add] 9.4.17.5 (excerpt): |
1824 | // i-th array element is value-initialized for each k < i ≤ n, |
1825 | // where k is an expression-list size and n is an array extent. |
1826 | if (Offset >= ILE->getNumInits()) |
1827 | return svalBuilder.makeZeroVal(type: ElemT); |
1828 | |
1829 | const Expr *E = ILE->getInit(Init: Offset); |
1830 | const auto *IL = dyn_cast<InitListExpr>(Val: E); |
1831 | if (!IL) |
1832 | // Return a constant value, if it is presented. |
1833 | // FIXME: Support other SVals. |
1834 | return svalBuilder.getConstantVal(E); |
1835 | |
1836 | // Go to the nested initializer list. |
1837 | ILE = IL; |
1838 | } |
1839 | |
1840 | assert(ILE); |
1841 | |
1842 | // FIXME: Unhandeled InitListExpr sub-expression, possibly constructing an |
1843 | // enum? |
1844 | return std::nullopt; |
1845 | } |
1846 | |
1847 | /// Returns an SVal, if possible, for the specified position in a string |
1848 | /// literal. |
1849 | /// |
1850 | /// \param SL The given string literal. |
1851 | /// \param Offset The unsigned offset. E.g. for the expression |
1852 | /// `char x = str[42];` an offset should be 42. |
1853 | /// E.g. for the string "abc" offset: |
1854 | /// - 1 returns SVal{b}, because it's the second position in the string. |
1855 | /// - 42 returns SVal{0}, because there's no explicit value at this |
1856 | /// position in the string. |
1857 | /// \param ElemT The type of the result SVal expression. |
1858 | /// |
1859 | /// NOTE: We return `0` for every offset >= the literal length for array |
1860 | /// declarations, like: |
1861 | /// const char str[42] = "123"; // Literal length is 4. |
1862 | /// char c = str[41]; // Offset is 41. |
1863 | /// FIXME: Nevertheless, we can't do the same for pointer declaraions, like: |
1864 | /// const char * const str = "123"; // Literal length is 4. |
1865 | /// char c = str[41]; // Offset is 41. Returns `0`, but Undef |
1866 | /// // expected. |
1867 | /// It should be properly handled before reaching this point. |
1868 | /// The main problem is that we can't distinguish between these declarations, |
1869 | /// because in case of array we can get the Decl from VarRegion, but in case |
1870 | /// of pointer the region is a StringRegion, which doesn't contain a Decl. |
1871 | /// Possible solution could be passing an array extent along with the offset. |
1872 | SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, |
1873 | uint64_t Offset, |
1874 | QualType ElemT) { |
1875 | assert(SL && "StringLiteral should not be null" ); |
1876 | // C++20 [dcl.init.string] 9.4.2.3: |
1877 | // If there are fewer initializers than there are array elements, each |
1878 | // element not explicitly initialized shall be zero-initialized [dcl.init]. |
1879 | uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(i: Offset); |
1880 | return svalBuilder.makeIntVal(integer: Code, type: ElemT); |
1881 | } |
1882 | |
1883 | static std::optional<SVal> getDerivedSymbolForBinding( |
1884 | RegionBindingsConstRef B, const TypedValueRegion *BaseRegion, |
1885 | const TypedValueRegion *SubReg, const ASTContext &Ctx, SValBuilder &SVB) { |
1886 | assert(BaseRegion); |
1887 | QualType BaseTy = BaseRegion->getValueType(); |
1888 | QualType Ty = SubReg->getValueType(); |
1889 | if (BaseTy->isScalarType() && Ty->isScalarType()) { |
1890 | if (Ctx.getTypeSizeInChars(T: BaseTy) >= Ctx.getTypeSizeInChars(T: Ty)) { |
1891 | if (const std::optional<SVal> &ParentValue = |
1892 | B.getDirectBinding(R: BaseRegion)) { |
1893 | if (SymbolRef ParentValueAsSym = ParentValue->getAsSymbol()) |
1894 | return SVB.getDerivedRegionValueSymbolVal(parentSymbol: ParentValueAsSym, region: SubReg); |
1895 | |
1896 | if (ParentValue->isUndef()) |
1897 | return UndefinedVal(); |
1898 | |
1899 | // Other cases: give up. We are indexing into a larger object |
1900 | // that has some value, but we don't know how to handle that yet. |
1901 | return UnknownVal(); |
1902 | } |
1903 | } |
1904 | } |
1905 | return std::nullopt; |
1906 | } |
1907 | |
1908 | SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, |
1909 | const ElementRegion* R) { |
1910 | // Check if the region has a binding. |
1911 | if (const std::optional<SVal> &V = B.getDirectBinding(R)) |
1912 | return *V; |
1913 | |
1914 | const MemRegion* superR = R->getSuperRegion(); |
1915 | |
1916 | // Check if the region is an element region of a string literal. |
1917 | if (const StringRegion *StrR = dyn_cast<StringRegion>(Val: superR)) { |
1918 | // FIXME: Handle loads from strings where the literal is treated as |
1919 | // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according |
1920 | // to C++20 7.2.1.11 [basic.lval]. |
1921 | QualType T = Ctx.getAsArrayType(T: StrR->getValueType())->getElementType(); |
1922 | if (!Ctx.hasSameUnqualifiedType(T1: T, T2: R->getElementType())) |
1923 | return UnknownVal(); |
1924 | if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { |
1925 | const llvm::APSInt &Idx = CI->getValue(); |
1926 | if (Idx < 0) |
1927 | return UndefinedVal(); |
1928 | const StringLiteral *SL = StrR->getStringLiteral(); |
1929 | return getSValFromStringLiteral(SL, Offset: Idx.getZExtValue(), ElemT: T); |
1930 | } |
1931 | } else if (isa<ElementRegion, VarRegion>(Val: superR)) { |
1932 | if (std::optional<SVal> V = getConstantValFromConstArrayInitializer(B, R)) |
1933 | return *V; |
1934 | } |
1935 | |
1936 | // Check for loads from a code text region. For such loads, just give up. |
1937 | if (isa<CodeTextRegion>(Val: superR)) |
1938 | return UnknownVal(); |
1939 | |
1940 | // Handle the case where we are indexing into a larger scalar object. |
1941 | // For example, this handles: |
1942 | // int x = ... |
1943 | // char *y = &x; |
1944 | // return *y; |
1945 | // FIXME: This is a hack, and doesn't do anything really intelligent yet. |
1946 | const RegionRawOffset &O = R->getAsArrayOffset(); |
1947 | |
1948 | // If we cannot reason about the offset, return an unknown value. |
1949 | if (!O.getRegion()) |
1950 | return UnknownVal(); |
1951 | |
1952 | if (const TypedValueRegion *baseR = dyn_cast<TypedValueRegion>(Val: O.getRegion())) |
1953 | if (auto V = getDerivedSymbolForBinding(B, baseR, R, Ctx, svalBuilder)) |
1954 | return *V; |
1955 | |
1956 | return getBindingForFieldOrElementCommon(B, R, R->getElementType()); |
1957 | } |
1958 | |
1959 | SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B, |
1960 | const FieldRegion* R) { |
1961 | |
1962 | // Check if the region has a binding. |
1963 | if (const std::optional<SVal> &V = B.getDirectBinding(R)) |
1964 | return *V; |
1965 | |
1966 | // If the containing record was initialized, try to get its constant value. |
1967 | const FieldDecl *FD = R->getDecl(); |
1968 | QualType Ty = FD->getType(); |
1969 | const MemRegion* superR = R->getSuperRegion(); |
1970 | if (const auto *VR = dyn_cast<VarRegion>(Val: superR)) { |
1971 | const VarDecl *VD = VR->getDecl(); |
1972 | QualType RecordVarTy = VD->getType(); |
1973 | unsigned Index = FD->getFieldIndex(); |
1974 | // Either the record variable or the field has an initializer that we can |
1975 | // trust. We trust initializers of constants and, additionally, respect |
1976 | // initializers of globals when analyzing main(). |
1977 | if (RecordVarTy.isConstQualified() || Ty.isConstQualified() || |
1978 | (B.isMainAnalysis() && VD->hasGlobalStorage())) |
1979 | if (const Expr *Init = VD->getAnyInitializer()) |
1980 | if (const auto *InitList = dyn_cast<InitListExpr>(Val: Init)) { |
1981 | if (Index < InitList->getNumInits()) { |
1982 | if (const Expr *FieldInit = InitList->getInit(Init: Index)) |
1983 | if (std::optional<SVal> V = svalBuilder.getConstantVal(E: FieldInit)) |
1984 | return *V; |
1985 | } else { |
1986 | return svalBuilder.makeZeroVal(type: Ty); |
1987 | } |
1988 | } |
1989 | } |
1990 | |
1991 | // Handle the case where we are accessing into a larger scalar object. |
1992 | // For example, this handles: |
1993 | // struct header { |
1994 | // unsigned a : 1; |
1995 | // unsigned b : 1; |
1996 | // }; |
1997 | // struct parse_t { |
1998 | // unsigned bits0 : 1; |
1999 | // unsigned bits2 : 2; // <-- header |
2000 | // unsigned bits4 : 4; |
2001 | // }; |
2002 | // int parse(parse_t *p) { |
2003 | // unsigned copy = p->bits2; |
2004 | // header *bits = (header *)© |
2005 | // return bits->b; <-- here |
2006 | // } |
2007 | if (const auto *Base = dyn_cast<TypedValueRegion>(Val: R->getBaseRegion())) |
2008 | if (auto V = getDerivedSymbolForBinding(B, BaseRegion: Base, SubReg: R, Ctx, SVB&: svalBuilder)) |
2009 | return *V; |
2010 | |
2011 | return getBindingForFieldOrElementCommon(B, R, Ty); |
2012 | } |
2013 | |
2014 | std::optional<SVal> RegionStoreManager::getBindingForDerivedDefaultValue( |
2015 | RegionBindingsConstRef B, const MemRegion *superR, |
2016 | const TypedValueRegion *R, QualType Ty) { |
2017 | |
2018 | if (const std::optional<SVal> &D = B.getDefaultBinding(R: superR)) { |
2019 | SVal val = *D; |
2020 | if (SymbolRef parentSym = val.getAsSymbol()) |
2021 | return svalBuilder.getDerivedRegionValueSymbolVal(parentSymbol: parentSym, region: R); |
2022 | |
2023 | if (val.isZeroConstant()) |
2024 | return svalBuilder.makeZeroVal(type: Ty); |
2025 | |
2026 | if (val.isUnknownOrUndef()) |
2027 | return val; |
2028 | |
2029 | // Lazy bindings are usually handled through getExistingLazyBinding(). |
2030 | // We should unify these two code paths at some point. |
2031 | if (isa<nonloc::LazyCompoundVal, nonloc::CompoundVal>(Val: val)) |
2032 | return val; |
2033 | |
2034 | llvm_unreachable("Unknown default value" ); |
2035 | } |
2036 | |
2037 | return std::nullopt; |
2038 | } |
2039 | |
2040 | SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion, |
2041 | RegionBindingsRef LazyBinding) { |
2042 | SVal Result; |
2043 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: LazyBindingRegion)) |
2044 | Result = getBindingForElement(B: LazyBinding, R: ER); |
2045 | else |
2046 | Result = getBindingForField(B: LazyBinding, |
2047 | R: cast<FieldRegion>(Val: LazyBindingRegion)); |
2048 | |
2049 | // FIXME: This is a hack to deal with RegionStore's inability to distinguish a |
2050 | // default value for /part/ of an aggregate from a default value for the |
2051 | // /entire/ aggregate. The most common case of this is when struct Outer |
2052 | // has as its first member a struct Inner, which is copied in from a stack |
2053 | // variable. In this case, even if the Outer's default value is symbolic, 0, |
2054 | // or unknown, it gets overridden by the Inner's default value of undefined. |
2055 | // |
2056 | // This is a general problem -- if the Inner is zero-initialized, the Outer |
2057 | // will now look zero-initialized. The proper way to solve this is with a |
2058 | // new version of RegionStore that tracks the extent of a binding as well |
2059 | // as the offset. |
2060 | // |
2061 | // This hack only takes care of the undefined case because that can very |
2062 | // quickly result in a warning. |
2063 | if (Result.isUndef()) |
2064 | Result = UnknownVal(); |
2065 | |
2066 | return Result; |
2067 | } |
2068 | |
2069 | SVal |
2070 | RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B, |
2071 | const TypedValueRegion *R, |
2072 | QualType Ty) { |
2073 | |
2074 | // At this point we have already checked in either getBindingForElement or |
2075 | // getBindingForField if 'R' has a direct binding. |
2076 | |
2077 | // Lazy binding? |
2078 | Store lazyBindingStore = nullptr; |
2079 | const SubRegion *lazyBindingRegion = nullptr; |
2080 | std::tie(args&: lazyBindingStore, args&: lazyBindingRegion) = findLazyBinding(B, R, originalRegion: R); |
2081 | if (lazyBindingRegion) |
2082 | return getLazyBinding(LazyBindingRegion: lazyBindingRegion, |
2083 | LazyBinding: getRegionBindings(store: lazyBindingStore)); |
2084 | |
2085 | // Record whether or not we see a symbolic index. That can completely |
2086 | // be out of scope of our lookup. |
2087 | bool hasSymbolicIndex = false; |
2088 | |
2089 | // FIXME: This is a hack to deal with RegionStore's inability to distinguish a |
2090 | // default value for /part/ of an aggregate from a default value for the |
2091 | // /entire/ aggregate. The most common case of this is when struct Outer |
2092 | // has as its first member a struct Inner, which is copied in from a stack |
2093 | // variable. In this case, even if the Outer's default value is symbolic, 0, |
2094 | // or unknown, it gets overridden by the Inner's default value of undefined. |
2095 | // |
2096 | // This is a general problem -- if the Inner is zero-initialized, the Outer |
2097 | // will now look zero-initialized. The proper way to solve this is with a |
2098 | // new version of RegionStore that tracks the extent of a binding as well |
2099 | // as the offset. |
2100 | // |
2101 | // This hack only takes care of the undefined case because that can very |
2102 | // quickly result in a warning. |
2103 | bool hasPartialLazyBinding = false; |
2104 | |
2105 | const SubRegion *SR = R; |
2106 | while (SR) { |
2107 | const MemRegion *Base = SR->getSuperRegion(); |
2108 | if (std::optional<SVal> D = |
2109 | getBindingForDerivedDefaultValue(B, superR: Base, R, Ty)) { |
2110 | if (D->getAs<nonloc::LazyCompoundVal>()) { |
2111 | hasPartialLazyBinding = true; |
2112 | break; |
2113 | } |
2114 | |
2115 | return *D; |
2116 | } |
2117 | |
2118 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: Base)) { |
2119 | NonLoc index = ER->getIndex(); |
2120 | if (!index.isConstant()) |
2121 | hasSymbolicIndex = true; |
2122 | } |
2123 | |
2124 | // If our super region is a field or element itself, walk up the region |
2125 | // hierarchy to see if there is a default value installed in an ancestor. |
2126 | SR = dyn_cast<SubRegion>(Val: Base); |
2127 | } |
2128 | |
2129 | if (R->hasStackNonParametersStorage()) { |
2130 | if (isa<ElementRegion>(Val: R)) { |
2131 | // Currently we don't reason specially about Clang-style vectors. Check |
2132 | // if superR is a vector and if so return Unknown. |
2133 | if (const TypedValueRegion *typedSuperR = |
2134 | dyn_cast<TypedValueRegion>(Val: R->getSuperRegion())) { |
2135 | if (typedSuperR->getValueType()->isVectorType()) |
2136 | return UnknownVal(); |
2137 | } |
2138 | } |
2139 | |
2140 | // FIXME: We also need to take ElementRegions with symbolic indexes into |
2141 | // account. This case handles both directly accessing an ElementRegion |
2142 | // with a symbolic offset, but also fields within an element with |
2143 | // a symbolic offset. |
2144 | if (hasSymbolicIndex) |
2145 | return UnknownVal(); |
2146 | |
2147 | // Additionally allow introspection of a block's internal layout. |
2148 | // Try to get direct binding if all other attempts failed thus far. |
2149 | // Else, return UndefinedVal() |
2150 | if (!hasPartialLazyBinding && !isa<BlockDataRegion>(Val: R->getBaseRegion())) { |
2151 | if (const std::optional<SVal> &V = B.getDefaultBinding(R)) |
2152 | return *V; |
2153 | return UndefinedVal(); |
2154 | } |
2155 | } |
2156 | |
2157 | // All other values are symbolic. |
2158 | return svalBuilder.getRegionValueSymbolVal(region: R); |
2159 | } |
2160 | |
2161 | SVal RegionStoreManager::getBindingForObjCIvar(RegionBindingsConstRef B, |
2162 | const ObjCIvarRegion* R) { |
2163 | // Check if the region has a binding. |
2164 | if (const std::optional<SVal> &V = B.getDirectBinding(R)) |
2165 | return *V; |
2166 | |
2167 | const MemRegion *superR = R->getSuperRegion(); |
2168 | |
2169 | // Check if the super region has a default binding. |
2170 | if (const std::optional<SVal> &V = B.getDefaultBinding(R: superR)) { |
2171 | if (SymbolRef parentSym = V->getAsSymbol()) |
2172 | return svalBuilder.getDerivedRegionValueSymbolVal(parentSymbol: parentSym, region: R); |
2173 | |
2174 | // Other cases: give up. |
2175 | return UnknownVal(); |
2176 | } |
2177 | |
2178 | return getBindingForLazySymbol(R); |
2179 | } |
2180 | |
2181 | SVal RegionStoreManager::getBindingForVar(RegionBindingsConstRef B, |
2182 | const VarRegion *R) { |
2183 | |
2184 | // Check if the region has a binding. |
2185 | if (std::optional<SVal> V = B.getDirectBinding(R)) |
2186 | return *V; |
2187 | |
2188 | if (std::optional<SVal> V = B.getDefaultBinding(R)) |
2189 | return *V; |
2190 | |
2191 | // Lazily derive a value for the VarRegion. |
2192 | const VarDecl *VD = R->getDecl(); |
2193 | const MemSpaceRegion *MS = R->getMemorySpace(); |
2194 | |
2195 | // Arguments are always symbolic. |
2196 | if (isa<StackArgumentsSpaceRegion>(Val: MS)) |
2197 | return svalBuilder.getRegionValueSymbolVal(region: R); |
2198 | |
2199 | // Is 'VD' declared constant? If so, retrieve the constant value. |
2200 | if (VD->getType().isConstQualified()) { |
2201 | if (const Expr *Init = VD->getAnyInitializer()) { |
2202 | if (std::optional<SVal> V = svalBuilder.getConstantVal(E: Init)) |
2203 | return *V; |
2204 | |
2205 | // If the variable is const qualified and has an initializer but |
2206 | // we couldn't evaluate initializer to a value, treat the value as |
2207 | // unknown. |
2208 | return UnknownVal(); |
2209 | } |
2210 | } |
2211 | |
2212 | // This must come after the check for constants because closure-captured |
2213 | // constant variables may appear in UnknownSpaceRegion. |
2214 | if (isa<UnknownSpaceRegion>(Val: MS)) |
2215 | return svalBuilder.getRegionValueSymbolVal(region: R); |
2216 | |
2217 | if (isa<GlobalsSpaceRegion>(Val: MS)) { |
2218 | QualType T = VD->getType(); |
2219 | |
2220 | // If we're in main(), then global initializers have not become stale yet. |
2221 | if (B.isMainAnalysis()) |
2222 | if (const Expr *Init = VD->getAnyInitializer()) |
2223 | if (std::optional<SVal> V = svalBuilder.getConstantVal(E: Init)) |
2224 | return *V; |
2225 | |
2226 | // Function-scoped static variables are default-initialized to 0; if they |
2227 | // have an initializer, it would have been processed by now. |
2228 | // FIXME: This is only true when we're starting analysis from main(). |
2229 | // We're losing a lot of coverage here. |
2230 | if (isa<StaticGlobalSpaceRegion>(Val: MS)) |
2231 | return svalBuilder.makeZeroVal(type: T); |
2232 | |
2233 | if (std::optional<SVal> V = getBindingForDerivedDefaultValue(B, superR: MS, R, Ty: T)) { |
2234 | assert(!V->getAs<nonloc::LazyCompoundVal>()); |
2235 | return *V; |
2236 | } |
2237 | |
2238 | return svalBuilder.getRegionValueSymbolVal(region: R); |
2239 | } |
2240 | |
2241 | return UndefinedVal(); |
2242 | } |
2243 | |
2244 | SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) { |
2245 | // All other values are symbolic. |
2246 | return svalBuilder.getRegionValueSymbolVal(region: R); |
2247 | } |
2248 | |
2249 | const RegionStoreManager::SValListTy & |
2250 | RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) { |
2251 | // First, check the cache. |
2252 | LazyBindingsMapTy::iterator I = LazyBindingsMap.find(Val: LCV.getCVData()); |
2253 | if (I != LazyBindingsMap.end()) |
2254 | return I->second; |
2255 | |
2256 | // If we don't have a list of values cached, start constructing it. |
2257 | SValListTy List; |
2258 | |
2259 | const SubRegion *LazyR = LCV.getRegion(); |
2260 | RegionBindingsRef B = getRegionBindings(store: LCV.getStore()); |
2261 | |
2262 | // If this region had /no/ bindings at the time, there are no interesting |
2263 | // values to return. |
2264 | const ClusterBindings *Cluster = B.lookup(K: LazyR->getBaseRegion()); |
2265 | if (!Cluster) |
2266 | return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); |
2267 | |
2268 | SmallVector<BindingPair, 32> Bindings; |
2269 | collectSubRegionBindings(Bindings, SVB&: svalBuilder, Cluster: *Cluster, Top: LazyR, |
2270 | /*IncludeAllDefaultBindings=*/true); |
2271 | for (SVal V : llvm::make_second_range(c&: Bindings)) { |
2272 | if (V.isUnknownOrUndef() || V.isConstant()) |
2273 | continue; |
2274 | |
2275 | if (auto InnerLCV = V.getAs<nonloc::LazyCompoundVal>()) { |
2276 | const SValListTy &InnerList = getInterestingValues(LCV: *InnerLCV); |
2277 | List.insert(position: List.end(), first: InnerList.begin(), last: InnerList.end()); |
2278 | } |
2279 | |
2280 | List.push_back(x: V); |
2281 | } |
2282 | |
2283 | return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); |
2284 | } |
2285 | |
2286 | NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, |
2287 | const TypedValueRegion *R) { |
2288 | if (std::optional<nonloc::LazyCompoundVal> V = |
2289 | getExistingLazyBinding(SVB&: svalBuilder, B, R, AllowSubregionBindings: false)) |
2290 | return *V; |
2291 | |
2292 | return svalBuilder.makeLazyCompoundVal(store: StoreRef(B.asStore(), *this), region: R); |
2293 | } |
2294 | |
2295 | static bool isRecordEmpty(const RecordDecl *RD) { |
2296 | if (!RD->field_empty()) |
2297 | return false; |
2298 | if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(Val: RD)) |
2299 | return CRD->getNumBases() == 0; |
2300 | return true; |
2301 | } |
2302 | |
2303 | SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, |
2304 | const TypedValueRegion *R) { |
2305 | const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl(); |
2306 | if (!RD->getDefinition() || isRecordEmpty(RD)) |
2307 | return UnknownVal(); |
2308 | |
2309 | return createLazyBinding(B, R); |
2310 | } |
2311 | |
2312 | SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B, |
2313 | const TypedValueRegion *R) { |
2314 | assert(Ctx.getAsConstantArrayType(R->getValueType()) && |
2315 | "Only constant array types can have compound bindings." ); |
2316 | |
2317 | return createLazyBinding(B, R); |
2318 | } |
2319 | |
2320 | bool RegionStoreManager::includedInBindings(Store store, |
2321 | const MemRegion *region) const { |
2322 | RegionBindingsRef B = getRegionBindings(store); |
2323 | region = region->getBaseRegion(); |
2324 | |
2325 | // Quick path: if the base is the head of a cluster, the region is live. |
2326 | if (B.lookup(K: region)) |
2327 | return true; |
2328 | |
2329 | // Slow path: if the region is the VALUE of any binding, it is live. |
2330 | for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI) { |
2331 | const ClusterBindings &Cluster = RI.getData(); |
2332 | for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end(); |
2333 | CI != CE; ++CI) { |
2334 | SVal D = CI.getData(); |
2335 | if (const MemRegion *R = D.getAsRegion()) |
2336 | if (R->getBaseRegion() == region) |
2337 | return true; |
2338 | } |
2339 | } |
2340 | |
2341 | return false; |
2342 | } |
2343 | |
2344 | //===----------------------------------------------------------------------===// |
2345 | // Binding values to regions. |
2346 | //===----------------------------------------------------------------------===// |
2347 | |
2348 | StoreRef RegionStoreManager::killBinding(Store ST, Loc L) { |
2349 | if (std::optional<loc::MemRegionVal> LV = L.getAs<loc::MemRegionVal>()) |
2350 | if (const MemRegion* R = LV->getRegion()) |
2351 | return StoreRef(getRegionBindings(store: ST).removeBinding(R) |
2352 | .asImmutableMap() |
2353 | .getRootWithoutRetain(), |
2354 | *this); |
2355 | |
2356 | return StoreRef(ST, *this); |
2357 | } |
2358 | |
2359 | RegionBindingsRef |
2360 | RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) { |
2361 | // We only care about region locations. |
2362 | auto MemRegVal = L.getAs<loc::MemRegionVal>(); |
2363 | if (!MemRegVal) |
2364 | return B; |
2365 | |
2366 | const MemRegion *R = MemRegVal->getRegion(); |
2367 | |
2368 | // Check if the region is a struct region. |
2369 | if (const TypedValueRegion* TR = dyn_cast<TypedValueRegion>(Val: R)) { |
2370 | QualType Ty = TR->getValueType(); |
2371 | if (Ty->isArrayType()) |
2372 | return bindArray(B, R: TR, V); |
2373 | if (Ty->isStructureOrClassType()) |
2374 | return bindStruct(B, R: TR, V); |
2375 | if (Ty->isVectorType()) |
2376 | return bindVector(B, R: TR, V); |
2377 | if (Ty->isUnionType()) |
2378 | return bindAggregate(B, R: TR, DefaultVal: V); |
2379 | } |
2380 | |
2381 | // Binding directly to a symbolic region should be treated as binding |
2382 | // to element 0. |
2383 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: R)) |
2384 | R = GetElementZeroRegion(R: SR, T: SR->getPointeeStaticType()); |
2385 | |
2386 | assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) && |
2387 | "'this' pointer is not an l-value and is not assignable" ); |
2388 | |
2389 | // Clear out bindings that may overlap with this binding. |
2390 | RegionBindingsRef NewB = removeSubRegionBindings(B, Top: cast<SubRegion>(Val: R)); |
2391 | |
2392 | // LazyCompoundVals should be always bound as 'default' bindings. |
2393 | auto KeyKind = isa<nonloc::LazyCompoundVal>(Val: V) ? BindingKey::Default |
2394 | : BindingKey::Direct; |
2395 | return NewB.addBinding(K: BindingKey::Make(R, k: KeyKind), V); |
2396 | } |
2397 | |
2398 | RegionBindingsRef |
2399 | RegionStoreManager::setImplicitDefaultValue(RegionBindingsConstRef B, |
2400 | const MemRegion *R, |
2401 | QualType T) { |
2402 | SVal V; |
2403 | |
2404 | if (Loc::isLocType(T)) |
2405 | V = svalBuilder.makeNullWithType(type: T); |
2406 | else if (T->isIntegralOrEnumerationType()) |
2407 | V = svalBuilder.makeZeroVal(type: T); |
2408 | else if (T->isStructureOrClassType() || T->isArrayType()) { |
2409 | // Set the default value to a zero constant when it is a structure |
2410 | // or array. The type doesn't really matter. |
2411 | V = svalBuilder.makeZeroVal(type: Ctx.IntTy); |
2412 | } |
2413 | else { |
2414 | // We can't represent values of this type, but we still need to set a value |
2415 | // to record that the region has been initialized. |
2416 | // If this assertion ever fires, a new case should be added above -- we |
2417 | // should know how to default-initialize any value we can symbolicate. |
2418 | assert(!SymbolManager::canSymbolicate(T) && "This type is representable" ); |
2419 | V = UnknownVal(); |
2420 | } |
2421 | |
2422 | return B.addBinding(R, k: BindingKey::Default, V); |
2423 | } |
2424 | |
2425 | std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallArray( |
2426 | RegionBindingsConstRef B, const TypedValueRegion *R, const ArrayType *AT, |
2427 | nonloc::LazyCompoundVal LCV) { |
2428 | |
2429 | auto CAT = dyn_cast<ConstantArrayType>(Val: AT); |
2430 | |
2431 | // If we don't know the size, create a lazyCompoundVal instead. |
2432 | if (!CAT) |
2433 | return std::nullopt; |
2434 | |
2435 | QualType Ty = CAT->getElementType(); |
2436 | if (!(Ty->isScalarType() || Ty->isReferenceType())) |
2437 | return std::nullopt; |
2438 | |
2439 | // If the array is too big, create a LCV instead. |
2440 | uint64_t ArrSize = CAT->getLimitedSize(); |
2441 | if (ArrSize > SmallArrayLimit) |
2442 | return std::nullopt; |
2443 | |
2444 | RegionBindingsRef NewB = B; |
2445 | |
2446 | for (uint64_t i = 0; i < ArrSize; ++i) { |
2447 | auto Idx = svalBuilder.makeArrayIndex(idx: i); |
2448 | const ElementRegion *SrcER = |
2449 | MRMgr.getElementRegion(elementType: Ty, Idx, superRegion: LCV.getRegion(), Ctx); |
2450 | SVal V = getBindingForElement(B: getRegionBindings(store: LCV.getStore()), R: SrcER); |
2451 | |
2452 | const ElementRegion *DstER = MRMgr.getElementRegion(elementType: Ty, Idx, superRegion: R, Ctx); |
2453 | NewB = bind(B: NewB, L: loc::MemRegionVal(DstER), V); |
2454 | } |
2455 | |
2456 | return NewB; |
2457 | } |
2458 | |
2459 | RegionBindingsRef |
2460 | RegionStoreManager::bindArray(RegionBindingsConstRef B, |
2461 | const TypedValueRegion* R, |
2462 | SVal Init) { |
2463 | |
2464 | const ArrayType *AT =cast<ArrayType>(Val: Ctx.getCanonicalType(T: R->getValueType())); |
2465 | QualType ElementTy = AT->getElementType(); |
2466 | std::optional<uint64_t> Size; |
2467 | |
2468 | if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(Val: AT)) |
2469 | Size = CAT->getZExtSize(); |
2470 | |
2471 | // Check if the init expr is a literal. If so, bind the rvalue instead. |
2472 | // FIXME: It's not responsibility of the Store to transform this lvalue |
2473 | // to rvalue. ExprEngine or maybe even CFG should do this before binding. |
2474 | if (std::optional<loc::MemRegionVal> MRV = Init.getAs<loc::MemRegionVal>()) { |
2475 | SVal V = getBinding(S: B.asStore(), L: *MRV, T: R->getValueType()); |
2476 | return bindAggregate(B, R, DefaultVal: V); |
2477 | } |
2478 | |
2479 | // Handle lazy compound values. |
2480 | if (std::optional<nonloc::LazyCompoundVal> LCV = |
2481 | Init.getAs<nonloc::LazyCompoundVal>()) { |
2482 | if (std::optional<RegionBindingsRef> NewB = |
2483 | tryBindSmallArray(B, R, AT, LCV: *LCV)) |
2484 | return *NewB; |
2485 | |
2486 | return bindAggregate(B, R, DefaultVal: Init); |
2487 | } |
2488 | |
2489 | if (Init.isUnknown()) |
2490 | return bindAggregate(B, R, DefaultVal: UnknownVal()); |
2491 | |
2492 | // Remaining case: explicit compound values. |
2493 | const nonloc::CompoundVal& CV = Init.castAs<nonloc::CompoundVal>(); |
2494 | nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); |
2495 | uint64_t i = 0; |
2496 | |
2497 | RegionBindingsRef NewB(B); |
2498 | |
2499 | for (; Size ? i < *Size : true; ++i, ++VI) { |
2500 | // The init list might be shorter than the array length. |
2501 | if (VI == VE) |
2502 | break; |
2503 | |
2504 | NonLoc Idx = svalBuilder.makeArrayIndex(idx: i); |
2505 | const ElementRegion *ER = MRMgr.getElementRegion(elementType: ElementTy, Idx, superRegion: R, Ctx); |
2506 | |
2507 | if (ElementTy->isStructureOrClassType()) |
2508 | NewB = bindStruct(NewB, ER, *VI); |
2509 | else if (ElementTy->isArrayType()) |
2510 | NewB = bindArray(NewB, ER, *VI); |
2511 | else |
2512 | NewB = bind(B: NewB, L: loc::MemRegionVal(ER), V: *VI); |
2513 | } |
2514 | |
2515 | // If the init list is shorter than the array length (or the array has |
2516 | // variable length), set the array default value. Values that are already set |
2517 | // are not overwritten. |
2518 | if (!Size || i < *Size) |
2519 | NewB = setImplicitDefaultValue(B: NewB, R, T: ElementTy); |
2520 | |
2521 | return NewB; |
2522 | } |
2523 | |
2524 | RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B, |
2525 | const TypedValueRegion* R, |
2526 | SVal V) { |
2527 | QualType T = R->getValueType(); |
2528 | const VectorType *VT = T->castAs<VectorType>(); // Use castAs for typedefs. |
2529 | |
2530 | // Handle lazy compound values and symbolic values. |
2531 | if (isa<nonloc::LazyCompoundVal, nonloc::SymbolVal>(Val: V)) |
2532 | return bindAggregate(B, R, DefaultVal: V); |
2533 | |
2534 | // We may get non-CompoundVal accidentally due to imprecise cast logic or |
2535 | // that we are binding symbolic struct value. Kill the field values, and if |
2536 | // the value is symbolic go and bind it as a "default" binding. |
2537 | if (!isa<nonloc::CompoundVal>(Val: V)) { |
2538 | return bindAggregate(B, R, DefaultVal: UnknownVal()); |
2539 | } |
2540 | |
2541 | QualType ElemType = VT->getElementType(); |
2542 | nonloc::CompoundVal CV = V.castAs<nonloc::CompoundVal>(); |
2543 | nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); |
2544 | unsigned index = 0, numElements = VT->getNumElements(); |
2545 | RegionBindingsRef NewB(B); |
2546 | |
2547 | for ( ; index != numElements ; ++index) { |
2548 | if (VI == VE) |
2549 | break; |
2550 | |
2551 | NonLoc Idx = svalBuilder.makeArrayIndex(idx: index); |
2552 | const ElementRegion *ER = MRMgr.getElementRegion(elementType: ElemType, Idx, superRegion: R, Ctx); |
2553 | |
2554 | if (ElemType->isArrayType()) |
2555 | NewB = bindArray(NewB, ER, *VI); |
2556 | else if (ElemType->isStructureOrClassType()) |
2557 | NewB = bindStruct(NewB, ER, *VI); |
2558 | else |
2559 | NewB = bind(B: NewB, L: loc::MemRegionVal(ER), V: *VI); |
2560 | } |
2561 | return NewB; |
2562 | } |
2563 | |
2564 | std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct( |
2565 | RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, |
2566 | nonloc::LazyCompoundVal LCV) { |
2567 | FieldVector Fields; |
2568 | |
2569 | if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(Val: RD)) |
2570 | if (Class->getNumBases() != 0 || Class->getNumVBases() != 0) |
2571 | return std::nullopt; |
2572 | |
2573 | for (const auto *FD : RD->fields()) { |
2574 | if (FD->isUnnamedBitField()) |
2575 | continue; |
2576 | |
2577 | // If there are too many fields, or if any of the fields are aggregates, |
2578 | // just use the LCV as a default binding. |
2579 | if (Fields.size() == SmallStructLimit) |
2580 | return std::nullopt; |
2581 | |
2582 | QualType Ty = FD->getType(); |
2583 | |
2584 | // Zero length arrays are basically no-ops, so we also ignore them here. |
2585 | if (Ty->isConstantArrayType() && |
2586 | Ctx.getConstantArrayElementCount(CA: Ctx.getAsConstantArrayType(T: Ty)) == 0) |
2587 | continue; |
2588 | |
2589 | if (!(Ty->isScalarType() || Ty->isReferenceType())) |
2590 | return std::nullopt; |
2591 | |
2592 | Fields.push_back(Elt: FD); |
2593 | } |
2594 | |
2595 | RegionBindingsRef NewB = B; |
2596 | |
2597 | for (const FieldDecl *Field : Fields) { |
2598 | const FieldRegion *SourceFR = MRMgr.getFieldRegion(fd: Field, superRegion: LCV.getRegion()); |
2599 | SVal V = getBindingForField(B: getRegionBindings(store: LCV.getStore()), R: SourceFR); |
2600 | |
2601 | const FieldRegion *DestFR = MRMgr.getFieldRegion(fd: Field, superRegion: R); |
2602 | NewB = bind(B: NewB, L: loc::MemRegionVal(DestFR), V); |
2603 | } |
2604 | |
2605 | return NewB; |
2606 | } |
2607 | |
2608 | RegionBindingsRef RegionStoreManager::bindStruct(RegionBindingsConstRef B, |
2609 | const TypedValueRegion *R, |
2610 | SVal V) { |
2611 | QualType T = R->getValueType(); |
2612 | assert(T->isStructureOrClassType()); |
2613 | |
2614 | const RecordType* RT = T->castAs<RecordType>(); |
2615 | const RecordDecl *RD = RT->getDecl(); |
2616 | |
2617 | if (!RD->isCompleteDefinition()) |
2618 | return B; |
2619 | |
2620 | // Handle lazy compound values and symbolic values. |
2621 | if (std::optional<nonloc::LazyCompoundVal> LCV = |
2622 | V.getAs<nonloc::LazyCompoundVal>()) { |
2623 | if (std::optional<RegionBindingsRef> NewB = |
2624 | tryBindSmallStruct(B, R, RD, LCV: *LCV)) |
2625 | return *NewB; |
2626 | return bindAggregate(B, R, DefaultVal: V); |
2627 | } |
2628 | if (isa<nonloc::SymbolVal>(Val: V)) |
2629 | return bindAggregate(B, R, DefaultVal: V); |
2630 | |
2631 | // We may get non-CompoundVal accidentally due to imprecise cast logic or |
2632 | // that we are binding symbolic struct value. Kill the field values, and if |
2633 | // the value is symbolic go and bind it as a "default" binding. |
2634 | if (V.isUnknown() || !isa<nonloc::CompoundVal>(Val: V)) |
2635 | return bindAggregate(B, R, DefaultVal: UnknownVal()); |
2636 | |
2637 | // The raw CompoundVal is essentially a symbolic InitListExpr: an (immutable) |
2638 | // list of other values. It appears pretty much only when there's an actual |
2639 | // initializer list expression in the program, and the analyzer tries to |
2640 | // unwrap it as soon as possible. |
2641 | // This code is where such unwrap happens: when the compound value is put into |
2642 | // the object that it was supposed to initialize (it's an *initializer* list, |
2643 | // after all), instead of binding the whole value to the whole object, we bind |
2644 | // sub-values to sub-objects. Sub-values may themselves be compound values, |
2645 | // and in this case the procedure becomes recursive. |
2646 | // FIXME: The annoying part about compound values is that they don't carry |
2647 | // any sort of information about which value corresponds to which sub-object. |
2648 | // It's simply a list of values in the middle of nowhere; we expect to match |
2649 | // them to sub-objects, essentially, "by index": first value binds to |
2650 | // the first field, second value binds to the second field, etc. |
2651 | // It would have been much safer to organize non-lazy compound values as |
2652 | // a mapping from fields/bases to values. |
2653 | const nonloc::CompoundVal& CV = V.castAs<nonloc::CompoundVal>(); |
2654 | nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); |
2655 | |
2656 | RegionBindingsRef NewB(B); |
2657 | |
2658 | // In C++17 aggregates may have base classes, handle those as well. |
2659 | // They appear before fields in the initializer list / compound value. |
2660 | if (const auto *CRD = dyn_cast<CXXRecordDecl>(Val: RD)) { |
2661 | // If the object was constructed with a constructor, its value is a |
2662 | // LazyCompoundVal. If it's a raw CompoundVal, it means that we're |
2663 | // performing aggregate initialization. The only exception from this |
2664 | // rule is sending an Objective-C++ message that returns a C++ object |
2665 | // to a nil receiver; in this case the semantics is to return a |
2666 | // zero-initialized object even if it's a C++ object that doesn't have |
2667 | // this sort of constructor; the CompoundVal is empty in this case. |
2668 | assert((CRD->isAggregate() || (Ctx.getLangOpts().ObjC && VI == VE)) && |
2669 | "Non-aggregates are constructed with a constructor!" ); |
2670 | |
2671 | for (const auto &B : CRD->bases()) { |
2672 | // (Multiple inheritance is fine though.) |
2673 | assert(!B.isVirtual() && "Aggregates cannot have virtual base classes!" ); |
2674 | |
2675 | if (VI == VE) |
2676 | break; |
2677 | |
2678 | QualType BTy = B.getType(); |
2679 | assert(BTy->isStructureOrClassType() && "Base classes must be classes!" ); |
2680 | |
2681 | const CXXRecordDecl *BRD = BTy->getAsCXXRecordDecl(); |
2682 | assert(BRD && "Base classes must be C++ classes!" ); |
2683 | |
2684 | const CXXBaseObjectRegion *BR = |
2685 | MRMgr.getCXXBaseObjectRegion(BaseClass: BRD, Super: R, /*IsVirtual=*/false); |
2686 | |
2687 | NewB = bindStruct(B: NewB, R: BR, V: *VI); |
2688 | |
2689 | ++VI; |
2690 | } |
2691 | } |
2692 | |
2693 | RecordDecl::field_iterator FI, FE; |
2694 | |
2695 | for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) { |
2696 | |
2697 | if (VI == VE) |
2698 | break; |
2699 | |
2700 | // Skip any unnamed bitfields to stay in sync with the initializers. |
2701 | if (FI->isUnnamedBitField()) |
2702 | continue; |
2703 | |
2704 | QualType FTy = FI->getType(); |
2705 | const FieldRegion* FR = MRMgr.getFieldRegion(fd: *FI, superRegion: R); |
2706 | |
2707 | if (FTy->isArrayType()) |
2708 | NewB = bindArray(B: NewB, R: FR, Init: *VI); |
2709 | else if (FTy->isStructureOrClassType()) |
2710 | NewB = bindStruct(B: NewB, R: FR, V: *VI); |
2711 | else |
2712 | NewB = bind(B: NewB, L: loc::MemRegionVal(FR), V: *VI); |
2713 | ++VI; |
2714 | } |
2715 | |
2716 | // There may be fewer values in the initialize list than the fields of struct. |
2717 | if (FI != FE) { |
2718 | NewB = NewB.addBinding(R, k: BindingKey::Default, |
2719 | V: svalBuilder.makeIntVal(integer: 0, isUnsigned: false)); |
2720 | } |
2721 | |
2722 | return NewB; |
2723 | } |
2724 | |
2725 | RegionBindingsRef |
2726 | RegionStoreManager::bindAggregate(RegionBindingsConstRef B, |
2727 | const TypedRegion *R, |
2728 | SVal Val) { |
2729 | // Remove the old bindings, using 'R' as the root of all regions |
2730 | // we will invalidate. Then add the new binding. |
2731 | return removeSubRegionBindings(B, Top: R).addBinding(R, k: BindingKey::Default, V: Val); |
2732 | } |
2733 | |
2734 | //===----------------------------------------------------------------------===// |
2735 | // State pruning. |
2736 | //===----------------------------------------------------------------------===// |
2737 | |
2738 | namespace { |
2739 | class RemoveDeadBindingsWorker |
2740 | : public ClusterAnalysis<RemoveDeadBindingsWorker> { |
2741 | SmallVector<const SymbolicRegion *, 12> Postponed; |
2742 | SymbolReaper &SymReaper; |
2743 | const StackFrameContext *CurrentLCtx; |
2744 | |
2745 | public: |
2746 | RemoveDeadBindingsWorker(RegionStoreManager &rm, |
2747 | ProgramStateManager &stateMgr, |
2748 | RegionBindingsRef b, SymbolReaper &symReaper, |
2749 | const StackFrameContext *LCtx) |
2750 | : ClusterAnalysis<RemoveDeadBindingsWorker>(rm, stateMgr, b), |
2751 | SymReaper(symReaper), CurrentLCtx(LCtx) {} |
2752 | |
2753 | // Called by ClusterAnalysis. |
2754 | void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C); |
2755 | void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); |
2756 | using ClusterAnalysis<RemoveDeadBindingsWorker>::VisitCluster; |
2757 | |
2758 | using ClusterAnalysis::AddToWorkList; |
2759 | |
2760 | bool AddToWorkList(const MemRegion *R); |
2761 | |
2762 | bool UpdatePostponed(); |
2763 | void VisitBinding(SVal V); |
2764 | }; |
2765 | } |
2766 | |
2767 | bool RemoveDeadBindingsWorker::AddToWorkList(const MemRegion *R) { |
2768 | const MemRegion *BaseR = R->getBaseRegion(); |
2769 | return AddToWorkList(E: WorkListElement(BaseR), C: getCluster(R: BaseR)); |
2770 | } |
2771 | |
2772 | void RemoveDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR, |
2773 | const ClusterBindings &C) { |
2774 | |
2775 | if (const VarRegion *VR = dyn_cast<VarRegion>(Val: baseR)) { |
2776 | if (SymReaper.isLive(VR)) |
2777 | AddToWorkList(E: baseR, C: &C); |
2778 | |
2779 | return; |
2780 | } |
2781 | |
2782 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: baseR)) { |
2783 | if (SymReaper.isLive(sym: SR->getSymbol())) |
2784 | AddToWorkList(E: SR, C: &C); |
2785 | else |
2786 | Postponed.push_back(Elt: SR); |
2787 | |
2788 | return; |
2789 | } |
2790 | |
2791 | if (isa<NonStaticGlobalSpaceRegion>(Val: baseR)) { |
2792 | AddToWorkList(E: baseR, C: &C); |
2793 | return; |
2794 | } |
2795 | |
2796 | // CXXThisRegion in the current or parent location context is live. |
2797 | if (const CXXThisRegion *TR = dyn_cast<CXXThisRegion>(Val: baseR)) { |
2798 | const auto *StackReg = |
2799 | cast<StackArgumentsSpaceRegion>(Val: TR->getSuperRegion()); |
2800 | const StackFrameContext *RegCtx = StackReg->getStackFrame(); |
2801 | if (CurrentLCtx && |
2802 | (RegCtx == CurrentLCtx || RegCtx->isParentOf(LC: CurrentLCtx))) |
2803 | AddToWorkList(E: TR, C: &C); |
2804 | } |
2805 | } |
2806 | |
2807 | void RemoveDeadBindingsWorker::VisitCluster(const MemRegion *baseR, |
2808 | const ClusterBindings *C) { |
2809 | if (!C) |
2810 | return; |
2811 | |
2812 | // Mark the symbol for any SymbolicRegion with live bindings as live itself. |
2813 | // This means we should continue to track that symbol. |
2814 | if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(Val: baseR)) |
2815 | SymReaper.markLive(sym: SymR->getSymbol()); |
2816 | |
2817 | for (const auto &[Key, Val] : *C) { |
2818 | // Element index of a binding key is live. |
2819 | SymReaper.markElementIndicesLive(region: Key.getRegion()); |
2820 | |
2821 | VisitBinding(V: Val); |
2822 | } |
2823 | } |
2824 | |
2825 | void RemoveDeadBindingsWorker::VisitBinding(SVal V) { |
2826 | // Is it a LazyCompoundVal? All referenced regions are live as well. |
2827 | // The LazyCompoundVal itself is not live but should be readable. |
2828 | if (auto LCS = V.getAs<nonloc::LazyCompoundVal>()) { |
2829 | SymReaper.markLazilyCopied(region: LCS->getRegion()); |
2830 | |
2831 | for (SVal V : RM.getInterestingValues(LCV: *LCS)) { |
2832 | if (auto DepLCS = V.getAs<nonloc::LazyCompoundVal>()) |
2833 | SymReaper.markLazilyCopied(region: DepLCS->getRegion()); |
2834 | else |
2835 | VisitBinding(V); |
2836 | } |
2837 | |
2838 | return; |
2839 | } |
2840 | |
2841 | // If V is a region, then add it to the worklist. |
2842 | if (const MemRegion *R = V.getAsRegion()) { |
2843 | AddToWorkList(R); |
2844 | SymReaper.markLive(region: R); |
2845 | |
2846 | // All regions captured by a block are also live. |
2847 | if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(Val: R)) { |
2848 | for (auto Var : BR->referenced_vars()) |
2849 | AddToWorkList(R: Var.getCapturedRegion()); |
2850 | } |
2851 | } |
2852 | |
2853 | |
2854 | // Update the set of live symbols. |
2855 | for (SymbolRef Sym : V.symbols()) |
2856 | SymReaper.markLive(sym: Sym); |
2857 | } |
2858 | |
2859 | bool RemoveDeadBindingsWorker::UpdatePostponed() { |
2860 | // See if any postponed SymbolicRegions are actually live now, after |
2861 | // having done a scan. |
2862 | bool Changed = false; |
2863 | |
2864 | for (const SymbolicRegion *SR : Postponed) { |
2865 | if (SymReaper.isLive(sym: SR->getSymbol())) { |
2866 | Changed |= AddToWorkList(R: SR); |
2867 | SR = nullptr; |
2868 | } |
2869 | } |
2870 | |
2871 | return Changed; |
2872 | } |
2873 | |
2874 | StoreRef RegionStoreManager::removeDeadBindings(Store store, |
2875 | const StackFrameContext *LCtx, |
2876 | SymbolReaper& SymReaper) { |
2877 | RegionBindingsRef B = getRegionBindings(store); |
2878 | RemoveDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx); |
2879 | W.GenerateClusters(); |
2880 | |
2881 | // Enqueue the region roots onto the worklist. |
2882 | for (const MemRegion *Reg : SymReaper.regions()) { |
2883 | W.AddToWorkList(R: Reg); |
2884 | } |
2885 | |
2886 | do W.RunWorkList(); while (W.UpdatePostponed()); |
2887 | |
2888 | // We have now scanned the store, marking reachable regions and symbols |
2889 | // as live. We now remove all the regions that are dead from the store |
2890 | // as well as update DSymbols with the set symbols that are now dead. |
2891 | for (const MemRegion *Base : llvm::make_first_range(c&: B)) { |
2892 | // If the cluster has been visited, we know the region has been marked. |
2893 | // Otherwise, remove the dead entry. |
2894 | if (!W.isVisited(R: Base)) |
2895 | B = B.remove(K: Base); |
2896 | } |
2897 | |
2898 | return StoreRef(B.asStore(), *this); |
2899 | } |
2900 | |
2901 | //===----------------------------------------------------------------------===// |
2902 | // Utility methods. |
2903 | //===----------------------------------------------------------------------===// |
2904 | |
2905 | void RegionStoreManager::printJson(raw_ostream &Out, Store S, const char *NL, |
2906 | unsigned int Space, bool IsDot) const { |
2907 | RegionBindingsRef Bindings = getRegionBindings(store: S); |
2908 | |
2909 | Indent(Out, Space, IsDot) << "\"store\": " ; |
2910 | |
2911 | if (Bindings.isEmpty()) { |
2912 | Out << "null," << NL; |
2913 | return; |
2914 | } |
2915 | |
2916 | Out << "{ \"pointer\": \"" << Bindings.asStore() << "\", \"items\": [" << NL; |
2917 | Bindings.printJson(Out, NL, Space: Space + 1, IsDot); |
2918 | Indent(Out, Space, IsDot) << "]}," << NL; |
2919 | } |
2920 | |