1 | //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines RangeConstraintManager, a class that tracks simple |
10 | // equality and inequality constraints on symbolic values of ProgramState. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/Basic/JsonSupport.h" |
15 | #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" |
16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
17 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
18 | #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" |
19 | #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" |
20 | #include "llvm/ADT/FoldingSet.h" |
21 | #include "llvm/ADT/ImmutableSet.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallSet.h" |
24 | #include "llvm/ADT/StringExtras.h" |
25 | #include "llvm/Support/Compiler.h" |
26 | #include "llvm/Support/raw_ostream.h" |
27 | #include <algorithm> |
28 | #include <iterator> |
29 | #include <optional> |
30 | |
31 | using namespace clang; |
32 | using namespace ento; |
33 | |
34 | // This class can be extended with other tables which will help to reason |
35 | // about ranges more precisely. |
36 | class OperatorRelationsTable { |
37 | static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE && |
38 | BO_GE < BO_EQ && BO_EQ < BO_NE, |
39 | "This class relies on operators order. Rework it otherwise." ); |
40 | |
41 | public: |
42 | enum TriStateKind { |
43 | False = 0, |
44 | True, |
45 | Unknown, |
46 | }; |
47 | |
48 | private: |
49 | // CmpOpTable holds states which represent the corresponding range for |
50 | // branching an exploded graph. We can reason about the branch if there is |
51 | // a previously known fact of the existence of a comparison expression with |
52 | // operands used in the current expression. |
53 | // E.g. assuming (x < y) is true that means (x != y) is surely true. |
54 | // if (x previous_operation y) // < | != | > |
55 | // if (x operation y) // != | > | < |
56 | // tristate // True | Unknown | False |
57 | // |
58 | // CmpOpTable represents next: |
59 | // __|< |> |<=|>=|==|!=|UnknownX2| |
60 | // < |1 |0 |* |0 |0 |* |1 | |
61 | // > |0 |1 |0 |* |0 |* |1 | |
62 | // <=|1 |0 |1 |* |1 |* |0 | |
63 | // >=|0 |1 |* |1 |1 |* |0 | |
64 | // ==|0 |0 |* |* |1 |0 |1 | |
65 | // !=|1 |1 |* |* |0 |1 |0 | |
66 | // |
67 | // Columns stands for a previous operator. |
68 | // Rows stands for a current operator. |
69 | // Each row has exactly two `Unknown` cases. |
70 | // UnknownX2 means that both `Unknown` previous operators are met in code, |
71 | // and there is a special column for that, for example: |
72 | // if (x >= y) |
73 | // if (x != y) |
74 | // if (x <= y) |
75 | // False only |
76 | static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1; |
77 | const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = { |
78 | // < > <= >= == != UnknownX2 |
79 | {True, False, Unknown, False, False, Unknown, True}, // < |
80 | {False, True, False, Unknown, False, Unknown, True}, // > |
81 | {True, False, True, Unknown, True, Unknown, False}, // <= |
82 | {False, True, Unknown, True, True, Unknown, False}, // >= |
83 | {False, False, Unknown, Unknown, True, False, True}, // == |
84 | {True, True, Unknown, Unknown, False, True, False}, // != |
85 | }; |
86 | |
87 | static size_t getIndexFromOp(BinaryOperatorKind OP) { |
88 | return static_cast<size_t>(OP - BO_LT); |
89 | } |
90 | |
91 | public: |
92 | constexpr size_t getCmpOpCount() const { return CmpOpCount; } |
93 | |
94 | static BinaryOperatorKind getOpFromIndex(size_t Index) { |
95 | return static_cast<BinaryOperatorKind>(Index + BO_LT); |
96 | } |
97 | |
98 | TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP, |
99 | BinaryOperatorKind QueriedOP) const { |
100 | return CmpOpTable[getIndexFromOp(OP: CurrentOP)][getIndexFromOp(OP: QueriedOP)]; |
101 | } |
102 | |
103 | TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const { |
104 | return CmpOpTable[getIndexFromOp(OP: CurrentOP)][CmpOpCount]; |
105 | } |
106 | }; |
107 | |
108 | //===----------------------------------------------------------------------===// |
109 | // RangeSet implementation |
110 | //===----------------------------------------------------------------------===// |
111 | |
112 | RangeSet::ContainerType RangeSet::Factory::EmptySet{}; |
113 | |
114 | RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) { |
115 | ContainerType Result; |
116 | Result.reserve(N: LHS.size() + RHS.size()); |
117 | std::merge(first1: LHS.begin(), last1: LHS.end(), first2: RHS.begin(), last2: RHS.end(), |
118 | result: std::back_inserter(x&: Result)); |
119 | return makePersistent(From: std::move(Result)); |
120 | } |
121 | |
122 | RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) { |
123 | ContainerType Result; |
124 | Result.reserve(N: Original.size() + 1); |
125 | |
126 | const_iterator Lower = llvm::lower_bound(Range&: Original, Value&: Element); |
127 | Result.insert(I: Result.end(), From: Original.begin(), To: Lower); |
128 | Result.push_back(Elt: Element); |
129 | Result.insert(I: Result.end(), From: Lower, To: Original.end()); |
130 | |
131 | return makePersistent(From: std::move(Result)); |
132 | } |
133 | |
134 | RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) { |
135 | return add(Original, Element: Range(Point)); |
136 | } |
137 | |
138 | RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) { |
139 | ContainerType Result = unite(LHS: *LHS.Impl, RHS: *RHS.Impl); |
140 | return makePersistent(From: std::move(Result)); |
141 | } |
142 | |
143 | RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) { |
144 | ContainerType Result; |
145 | Result.push_back(Elt: R); |
146 | Result = unite(LHS: *Original.Impl, RHS: Result); |
147 | return makePersistent(From: std::move(Result)); |
148 | } |
149 | |
150 | RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) { |
151 | return unite(Original, R: Range(ValueFactory.getValue(X: Point))); |
152 | } |
153 | |
154 | RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From, |
155 | llvm::APSInt To) { |
156 | return unite(Original, |
157 | R: Range(ValueFactory.getValue(X: From), ValueFactory.getValue(X: To))); |
158 | } |
159 | |
160 | template <typename T> |
161 | void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) { |
162 | std::swap(First, Second); |
163 | std::swap(FirstEnd, SecondEnd); |
164 | } |
165 | |
166 | RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS, |
167 | const ContainerType &RHS) { |
168 | if (LHS.empty()) |
169 | return RHS; |
170 | if (RHS.empty()) |
171 | return LHS; |
172 | |
173 | using llvm::APSInt; |
174 | using iterator = ContainerType::const_iterator; |
175 | |
176 | iterator First = LHS.begin(); |
177 | iterator FirstEnd = LHS.end(); |
178 | iterator Second = RHS.begin(); |
179 | iterator SecondEnd = RHS.end(); |
180 | APSIntType Ty = APSIntType(First->From()); |
181 | const APSInt Min = Ty.getMinValue(); |
182 | |
183 | // Handle a corner case first when both range sets start from MIN. |
184 | // This helps to avoid complicated conditions below. Specifically, this |
185 | // particular check for `MIN` is not needed in the loop below every time |
186 | // when we do `Second->From() - One` operation. |
187 | if (Min == First->From() && Min == Second->From()) { |
188 | if (First->To() > Second->To()) { |
189 | // [ First ]---> |
190 | // [ Second ]-----> |
191 | // MIN^ |
192 | // The Second range is entirely inside the First one. |
193 | |
194 | // Check if Second is the last in its RangeSet. |
195 | if (++Second == SecondEnd) |
196 | // [ First ]--[ First + 1 ]---> |
197 | // [ Second ]---------------------> |
198 | // MIN^ |
199 | // The Union is equal to First's RangeSet. |
200 | return LHS; |
201 | } else { |
202 | // case 1: [ First ]-----> |
203 | // case 2: [ First ]---> |
204 | // [ Second ]---> |
205 | // MIN^ |
206 | // The First range is entirely inside or equal to the Second one. |
207 | |
208 | // Check if First is the last in its RangeSet. |
209 | if (++First == FirstEnd) |
210 | // [ First ]-----------------------> |
211 | // [ Second ]--[ Second + 1 ]----> |
212 | // MIN^ |
213 | // The Union is equal to Second's RangeSet. |
214 | return RHS; |
215 | } |
216 | } |
217 | |
218 | const APSInt One = Ty.getValue(RawValue: 1); |
219 | ContainerType Result; |
220 | |
221 | // This is called when there are no ranges left in one of the ranges. |
222 | // Append the rest of the ranges from another range set to the Result |
223 | // and return with that. |
224 | const auto AppendTheRest = [&Result](iterator I, iterator E) { |
225 | Result.append(in_start: I, in_end: E); |
226 | return Result; |
227 | }; |
228 | |
229 | while (true) { |
230 | // We want to keep the following invariant at all times: |
231 | // ---[ First ------> |
232 | // -----[ Second ---> |
233 | if (First->From() > Second->From()) |
234 | swapIterators(First, FirstEnd, Second, SecondEnd); |
235 | |
236 | // The Union definitely starts with First->From(). |
237 | // ----------[ First ------> |
238 | // ------------[ Second ---> |
239 | // ----------[ Union ------> |
240 | // UnionStart^ |
241 | const llvm::APSInt &UnionStart = First->From(); |
242 | |
243 | // Loop where the invariant holds. |
244 | while (true) { |
245 | // Skip all enclosed ranges. |
246 | // ---[ First ]---> |
247 | // -----[ Second ]--[ Second + 1 ]--[ Second + N ]-----> |
248 | while (First->To() >= Second->To()) { |
249 | // Check if Second is the last in its RangeSet. |
250 | if (++Second == SecondEnd) { |
251 | // Append the Union. |
252 | // ---[ Union ]---> |
253 | // -----[ Second ]-----> |
254 | // --------[ First ]---> |
255 | // UnionEnd^ |
256 | Result.emplace_back(Args: UnionStart, Args: First->To()); |
257 | // ---[ Union ]-----------------> |
258 | // --------------[ First + 1]---> |
259 | // Append all remaining ranges from the First's RangeSet. |
260 | return AppendTheRest(++First, FirstEnd); |
261 | } |
262 | } |
263 | |
264 | // Check if First and Second are disjoint. It means that we find |
265 | // the end of the Union. Exit the loop and append the Union. |
266 | // ---[ First ]=-------------> |
267 | // ------------=[ Second ]---> |
268 | // ----MinusOne^ |
269 | if (First->To() < Second->From() - One) |
270 | break; |
271 | |
272 | // First is entirely inside the Union. Go next. |
273 | // ---[ Union -----------> |
274 | // ---- [ First ]--------> |
275 | // -------[ Second ]-----> |
276 | // Check if First is the last in its RangeSet. |
277 | if (++First == FirstEnd) { |
278 | // Append the Union. |
279 | // ---[ Union ]---> |
280 | // -----[ First ]-------> |
281 | // --------[ Second ]---> |
282 | // UnionEnd^ |
283 | Result.emplace_back(Args: UnionStart, Args: Second->To()); |
284 | // ---[ Union ]------------------> |
285 | // --------------[ Second + 1]---> |
286 | // Append all remaining ranges from the Second's RangeSet. |
287 | return AppendTheRest(++Second, SecondEnd); |
288 | } |
289 | |
290 | // We know that we are at one of the two cases: |
291 | // case 1: --[ First ]---------> |
292 | // case 2: ----[ First ]-------> |
293 | // --------[ Second ]----------> |
294 | // In both cases First starts after Second->From(). |
295 | // Make sure that the loop invariant holds. |
296 | swapIterators(First, FirstEnd, Second, SecondEnd); |
297 | } |
298 | |
299 | // Here First and Second are disjoint. |
300 | // Append the Union. |
301 | // ---[ Union ]---------------> |
302 | // -----------------[ Second ]---> |
303 | // ------[ First ]---------------> |
304 | // UnionEnd^ |
305 | Result.emplace_back(Args: UnionStart, Args: First->To()); |
306 | |
307 | // Check if First is the last in its RangeSet. |
308 | if (++First == FirstEnd) |
309 | // ---[ Union ]---------------> |
310 | // --------------[ Second ]---> |
311 | // Append all remaining ranges from the Second's RangeSet. |
312 | return AppendTheRest(Second, SecondEnd); |
313 | } |
314 | |
315 | llvm_unreachable("Normally, we should not reach here" ); |
316 | } |
317 | |
318 | RangeSet RangeSet::Factory::getRangeSet(Range From) { |
319 | ContainerType Result; |
320 | Result.push_back(Elt: From); |
321 | return makePersistent(From: std::move(Result)); |
322 | } |
323 | |
324 | RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) { |
325 | llvm::FoldingSetNodeID ID; |
326 | void *InsertPos; |
327 | |
328 | From.Profile(ID); |
329 | ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos); |
330 | |
331 | if (!Result) { |
332 | // It is cheaper to fully construct the resulting range on stack |
333 | // and move it to the freshly allocated buffer if we don't have |
334 | // a set like this already. |
335 | Result = construct(From: std::move(From)); |
336 | Cache.InsertNode(N: Result, InsertPos); |
337 | } |
338 | |
339 | return Result; |
340 | } |
341 | |
342 | RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) { |
343 | void *Buffer = Arena.Allocate(); |
344 | return new (Buffer) ContainerType(std::move(From)); |
345 | } |
346 | |
347 | const llvm::APSInt &RangeSet::getMinValue() const { |
348 | assert(!isEmpty()); |
349 | return begin()->From(); |
350 | } |
351 | |
352 | const llvm::APSInt &RangeSet::getMaxValue() const { |
353 | assert(!isEmpty()); |
354 | return std::prev(x: end())->To(); |
355 | } |
356 | |
357 | bool clang::ento::RangeSet::isUnsigned() const { |
358 | assert(!isEmpty()); |
359 | return begin()->From().isUnsigned(); |
360 | } |
361 | |
362 | uint32_t clang::ento::RangeSet::getBitWidth() const { |
363 | assert(!isEmpty()); |
364 | return begin()->From().getBitWidth(); |
365 | } |
366 | |
367 | APSIntType clang::ento::RangeSet::getAPSIntType() const { |
368 | assert(!isEmpty()); |
369 | return APSIntType(begin()->From()); |
370 | } |
371 | |
372 | bool RangeSet::containsImpl(llvm::APSInt &Point) const { |
373 | if (isEmpty() || !pin(Point)) |
374 | return false; |
375 | |
376 | Range Dummy(Point); |
377 | const_iterator It = llvm::upper_bound(Range: *this, Value&: Dummy); |
378 | if (It == begin()) |
379 | return false; |
380 | |
381 | return std::prev(x: It)->Includes(Point); |
382 | } |
383 | |
384 | bool RangeSet::pin(llvm::APSInt &Point) const { |
385 | APSIntType Type(getMinValue()); |
386 | if (Type.testInRange(Val: Point, AllowMixedSign: true) != APSIntType::RTR_Within) |
387 | return false; |
388 | |
389 | Type.apply(Value&: Point); |
390 | return true; |
391 | } |
392 | |
393 | bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { |
394 | // This function has nine cases, the cartesian product of range-testing |
395 | // both the upper and lower bounds against the symbol's type. |
396 | // Each case requires a different pinning operation. |
397 | // The function returns false if the described range is entirely outside |
398 | // the range of values for the associated symbol. |
399 | APSIntType Type(getMinValue()); |
400 | APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Val: Lower, AllowMixedSign: true); |
401 | APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Val: Upper, AllowMixedSign: true); |
402 | |
403 | switch (LowerTest) { |
404 | case APSIntType::RTR_Below: |
405 | switch (UpperTest) { |
406 | case APSIntType::RTR_Below: |
407 | // The entire range is outside the symbol's set of possible values. |
408 | // If this is a conventionally-ordered range, the state is infeasible. |
409 | if (Lower <= Upper) |
410 | return false; |
411 | |
412 | // However, if the range wraps around, it spans all possible values. |
413 | Lower = Type.getMinValue(); |
414 | Upper = Type.getMaxValue(); |
415 | break; |
416 | case APSIntType::RTR_Within: |
417 | // The range starts below what's possible but ends within it. Pin. |
418 | Lower = Type.getMinValue(); |
419 | Type.apply(Value&: Upper); |
420 | break; |
421 | case APSIntType::RTR_Above: |
422 | // The range spans all possible values for the symbol. Pin. |
423 | Lower = Type.getMinValue(); |
424 | Upper = Type.getMaxValue(); |
425 | break; |
426 | } |
427 | break; |
428 | case APSIntType::RTR_Within: |
429 | switch (UpperTest) { |
430 | case APSIntType::RTR_Below: |
431 | // The range wraps around, but all lower values are not possible. |
432 | Type.apply(Value&: Lower); |
433 | Upper = Type.getMaxValue(); |
434 | break; |
435 | case APSIntType::RTR_Within: |
436 | // The range may or may not wrap around, but both limits are valid. |
437 | Type.apply(Value&: Lower); |
438 | Type.apply(Value&: Upper); |
439 | break; |
440 | case APSIntType::RTR_Above: |
441 | // The range starts within what's possible but ends above it. Pin. |
442 | Type.apply(Value&: Lower); |
443 | Upper = Type.getMaxValue(); |
444 | break; |
445 | } |
446 | break; |
447 | case APSIntType::RTR_Above: |
448 | switch (UpperTest) { |
449 | case APSIntType::RTR_Below: |
450 | // The range wraps but is outside the symbol's set of possible values. |
451 | return false; |
452 | case APSIntType::RTR_Within: |
453 | // The range starts above what's possible but ends within it (wrap). |
454 | Lower = Type.getMinValue(); |
455 | Type.apply(Value&: Upper); |
456 | break; |
457 | case APSIntType::RTR_Above: |
458 | // The entire range is outside the symbol's set of possible values. |
459 | // If this is a conventionally-ordered range, the state is infeasible. |
460 | if (Lower <= Upper) |
461 | return false; |
462 | |
463 | // However, if the range wraps around, it spans all possible values. |
464 | Lower = Type.getMinValue(); |
465 | Upper = Type.getMaxValue(); |
466 | break; |
467 | } |
468 | break; |
469 | } |
470 | |
471 | return true; |
472 | } |
473 | |
474 | RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower, |
475 | llvm::APSInt Upper) { |
476 | if (What.isEmpty() || !What.pin(Lower, Upper)) |
477 | return getEmptySet(); |
478 | |
479 | ContainerType DummyContainer; |
480 | |
481 | if (Lower <= Upper) { |
482 | // [Lower, Upper] is a regular range. |
483 | // |
484 | // Shortcut: check that there is even a possibility of the intersection |
485 | // by checking the two following situations: |
486 | // |
487 | // <---[ What ]---[------]------> |
488 | // Lower Upper |
489 | // -or- |
490 | // <----[------]----[ What ]----> |
491 | // Lower Upper |
492 | if (What.getMaxValue() < Lower || Upper < What.getMinValue()) |
493 | return getEmptySet(); |
494 | |
495 | DummyContainer.push_back( |
496 | Elt: Range(ValueFactory.getValue(X: Lower), ValueFactory.getValue(X: Upper))); |
497 | } else { |
498 | // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX] |
499 | // |
500 | // Shortcut: check that there is even a possibility of the intersection |
501 | // by checking the following situation: |
502 | // |
503 | // <------]---[ What ]---[------> |
504 | // Upper Lower |
505 | if (What.getMaxValue() < Lower && Upper < What.getMinValue()) |
506 | return getEmptySet(); |
507 | |
508 | DummyContainer.push_back( |
509 | Elt: Range(ValueFactory.getMinValue(v: Upper), ValueFactory.getValue(X: Upper))); |
510 | DummyContainer.push_back( |
511 | Elt: Range(ValueFactory.getValue(X: Lower), ValueFactory.getMaxValue(v: Lower))); |
512 | } |
513 | |
514 | return intersect(LHS: *What.Impl, RHS: DummyContainer); |
515 | } |
516 | |
517 | RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS, |
518 | const RangeSet::ContainerType &RHS) { |
519 | ContainerType Result; |
520 | Result.reserve(N: std::max(a: LHS.size(), b: RHS.size())); |
521 | |
522 | const_iterator First = LHS.begin(), Second = RHS.begin(), |
523 | FirstEnd = LHS.end(), SecondEnd = RHS.end(); |
524 | |
525 | // If we ran out of ranges in one set, but not in the other, |
526 | // it means that those elements are definitely not in the |
527 | // intersection. |
528 | while (First != FirstEnd && Second != SecondEnd) { |
529 | // We want to keep the following invariant at all times: |
530 | // |
531 | // ----[ First ----------------------> |
532 | // --------[ Second -----------------> |
533 | if (Second->From() < First->From()) |
534 | swapIterators(First, FirstEnd, Second, SecondEnd); |
535 | |
536 | // Loop where the invariant holds: |
537 | do { |
538 | // Check for the following situation: |
539 | // |
540 | // ----[ First ]---------------------> |
541 | // ---------------[ Second ]---------> |
542 | // |
543 | // which means that... |
544 | if (Second->From() > First->To()) { |
545 | // ...First is not in the intersection. |
546 | // |
547 | // We should move on to the next range after First and break out of the |
548 | // loop because the invariant might not be true. |
549 | ++First; |
550 | break; |
551 | } |
552 | |
553 | // We have a guaranteed intersection at this point! |
554 | // And this is the current situation: |
555 | // |
556 | // ----[ First ]-----------------> |
557 | // -------[ Second ------------------> |
558 | // |
559 | // Additionally, it definitely starts with Second->From(). |
560 | const llvm::APSInt &IntersectionStart = Second->From(); |
561 | |
562 | // It is important to know which of the two ranges' ends |
563 | // is greater. That "longer" range might have some other |
564 | // intersections, while the "shorter" range might not. |
565 | if (Second->To() > First->To()) { |
566 | // Here we make a decision to keep First as the "longer" |
567 | // range. |
568 | swapIterators(First, FirstEnd, Second, SecondEnd); |
569 | } |
570 | |
571 | // At this point, we have the following situation: |
572 | // |
573 | // ---- First ]--------------------> |
574 | // ---- Second ]--[ Second+1 ----------> |
575 | // |
576 | // We don't know the relationship between First->From and |
577 | // Second->From and we don't know whether Second+1 intersects |
578 | // with First. |
579 | // |
580 | // However, we know that [IntersectionStart, Second->To] is |
581 | // a part of the intersection... |
582 | Result.push_back(Elt: Range(IntersectionStart, Second->To())); |
583 | ++Second; |
584 | // ...and that the invariant will hold for a valid Second+1 |
585 | // because First->From <= Second->To < (Second+1)->From. |
586 | } while (Second != SecondEnd); |
587 | } |
588 | |
589 | if (Result.empty()) |
590 | return getEmptySet(); |
591 | |
592 | return makePersistent(From: std::move(Result)); |
593 | } |
594 | |
595 | RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) { |
596 | // Shortcut: let's see if the intersection is even possible. |
597 | if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() || |
598 | RHS.getMaxValue() < LHS.getMinValue()) |
599 | return getEmptySet(); |
600 | |
601 | return intersect(LHS: *LHS.Impl, RHS: *RHS.Impl); |
602 | } |
603 | |
604 | RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) { |
605 | if (LHS.containsImpl(Point)) |
606 | return getRangeSet(Origin: ValueFactory.getValue(X: Point)); |
607 | |
608 | return getEmptySet(); |
609 | } |
610 | |
611 | RangeSet RangeSet::Factory::negate(RangeSet What) { |
612 | if (What.isEmpty()) |
613 | return getEmptySet(); |
614 | |
615 | const llvm::APSInt SampleValue = What.getMinValue(); |
616 | const llvm::APSInt &MIN = ValueFactory.getMinValue(v: SampleValue); |
617 | const llvm::APSInt &MAX = ValueFactory.getMaxValue(v: SampleValue); |
618 | |
619 | ContainerType Result; |
620 | Result.reserve(N: What.size() + (SampleValue == MIN)); |
621 | |
622 | // Handle a special case for MIN value. |
623 | const_iterator It = What.begin(); |
624 | const_iterator End = What.end(); |
625 | |
626 | const llvm::APSInt &From = It->From(); |
627 | const llvm::APSInt &To = It->To(); |
628 | |
629 | if (From == MIN) { |
630 | // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX]. |
631 | if (To == MAX) { |
632 | return What; |
633 | } |
634 | |
635 | const_iterator Last = std::prev(x: End); |
636 | |
637 | // Try to find and unite the following ranges: |
638 | // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. |
639 | if (Last->To() == MAX) { |
640 | // It means that in the original range we have ranges |
641 | // [MIN, A], ... , [B, MAX] |
642 | // And the result should be [MIN, -B], ..., [-A, MAX] |
643 | Result.emplace_back(Args: MIN, Args: ValueFactory.getValue(X: -Last->From())); |
644 | // We already negated Last, so we can skip it. |
645 | End = Last; |
646 | } else { |
647 | // Add a separate range for the lowest value. |
648 | Result.emplace_back(Args: MIN, Args: MIN); |
649 | } |
650 | |
651 | // Skip adding the second range in case when [From, To] are [MIN, MIN]. |
652 | if (To != MIN) { |
653 | Result.emplace_back(Args: ValueFactory.getValue(X: -To), Args: MAX); |
654 | } |
655 | |
656 | // Skip the first range in the loop. |
657 | ++It; |
658 | } |
659 | |
660 | // Negate all other ranges. |
661 | for (; It != End; ++It) { |
662 | // Negate int values. |
663 | const llvm::APSInt &NewFrom = ValueFactory.getValue(X: -It->To()); |
664 | const llvm::APSInt &NewTo = ValueFactory.getValue(X: -It->From()); |
665 | |
666 | // Add a negated range. |
667 | Result.emplace_back(Args: NewFrom, Args: NewTo); |
668 | } |
669 | |
670 | llvm::sort(C&: Result); |
671 | return makePersistent(From: std::move(Result)); |
672 | } |
673 | |
674 | // Convert range set to the given integral type using truncation and promotion. |
675 | // This works similar to APSIntType::apply function but for the range set. |
676 | RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) { |
677 | // Set is empty or NOOP (aka cast to the same type). |
678 | if (What.isEmpty() || What.getAPSIntType() == Ty) |
679 | return What; |
680 | |
681 | const bool IsConversion = What.isUnsigned() != Ty.isUnsigned(); |
682 | const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth(); |
683 | const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth(); |
684 | |
685 | if (IsTruncation) |
686 | return makePersistent(From: truncateTo(What, Ty)); |
687 | |
688 | // Here we handle 2 cases: |
689 | // - IsConversion && !IsPromotion. |
690 | // In this case we handle changing a sign with same bitwidth: char -> uchar, |
691 | // uint -> int. Here we convert negatives to positives and positives which |
692 | // is out of range to negatives. We use convertTo function for that. |
693 | // - IsConversion && IsPromotion && !What.isUnsigned(). |
694 | // In this case we handle changing a sign from signeds to unsigneds with |
695 | // higher bitwidth: char -> uint, int-> uint64. The point is that we also |
696 | // need convert negatives to positives and use convertTo function as well. |
697 | // For example, we don't need such a convertion when converting unsigned to |
698 | // signed with higher bitwidth, because all the values of unsigned is valid |
699 | // for the such signed. |
700 | if (IsConversion && (!IsPromotion || !What.isUnsigned())) |
701 | return makePersistent(From: convertTo(What, Ty)); |
702 | |
703 | assert(IsPromotion && "Only promotion operation from unsigneds left." ); |
704 | return makePersistent(From: promoteTo(What, Ty)); |
705 | } |
706 | |
707 | RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) { |
708 | assert(T->isIntegralOrEnumerationType() && "T shall be an integral type." ); |
709 | return castTo(What, Ty: ValueFactory.getAPSIntType(T)); |
710 | } |
711 | |
712 | RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What, |
713 | APSIntType Ty) { |
714 | using llvm::APInt; |
715 | using llvm::APSInt; |
716 | ContainerType Result; |
717 | ContainerType Dummy; |
718 | // CastRangeSize is an amount of all possible values of cast type. |
719 | // Example: `char` has 256 values; `short` has 65536 values. |
720 | // But in fact we use `amount of values` - 1, because |
721 | // we can't keep `amount of values of UINT64` inside uint64_t. |
722 | // E.g. 256 is an amount of all possible values of `char` and we can't keep |
723 | // it inside `char`. |
724 | // And it's OK, it's enough to do correct calculations. |
725 | uint64_t CastRangeSize = APInt::getMaxValue(numBits: Ty.getBitWidth()).getZExtValue(); |
726 | for (const Range &R : What) { |
727 | // Get bounds of the given range. |
728 | APSInt FromInt = R.From(); |
729 | APSInt ToInt = R.To(); |
730 | // CurrentRangeSize is an amount of all possible values of the current |
731 | // range minus one. |
732 | uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue(); |
733 | // This is an optimization for a specific case when this Range covers |
734 | // the whole range of the target type. |
735 | Dummy.clear(); |
736 | if (CurrentRangeSize >= CastRangeSize) { |
737 | Dummy.emplace_back(Args: ValueFactory.getMinValue(T: Ty), |
738 | Args: ValueFactory.getMaxValue(T: Ty)); |
739 | Result = std::move(Dummy); |
740 | break; |
741 | } |
742 | // Cast the bounds. |
743 | Ty.apply(Value&: FromInt); |
744 | Ty.apply(Value&: ToInt); |
745 | const APSInt &PersistentFrom = ValueFactory.getValue(X: FromInt); |
746 | const APSInt &PersistentTo = ValueFactory.getValue(X: ToInt); |
747 | if (FromInt > ToInt) { |
748 | Dummy.emplace_back(Args: ValueFactory.getMinValue(T: Ty), Args: PersistentTo); |
749 | Dummy.emplace_back(Args: PersistentFrom, Args: ValueFactory.getMaxValue(T: Ty)); |
750 | } else |
751 | Dummy.emplace_back(Args: PersistentFrom, Args: PersistentTo); |
752 | // Every range retrieved after truncation potentialy has garbage values. |
753 | // So, we have to unite every next range with the previouses. |
754 | Result = unite(LHS: Result, RHS: Dummy); |
755 | } |
756 | |
757 | return Result; |
758 | } |
759 | |
760 | // Divide the convertion into two phases (presented as loops here). |
761 | // First phase(loop) works when casted values go in ascending order. |
762 | // E.g. char{1,3,5,127} -> uint{1,3,5,127} |
763 | // Interrupt the first phase and go to second one when casted values start |
764 | // go in descending order. That means that we crossed over the middle of |
765 | // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds). |
766 | // For instance: |
767 | // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1} |
768 | // Here we put {1,3,5} to one array and {-128, -1} to another |
769 | // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3} |
770 | // Here we put {128,129,255} to one array and {0,1,3} to another. |
771 | // After that we unite both arrays. |
772 | // NOTE: We don't just concatenate the arrays, because they may have |
773 | // adjacent ranges, e.g.: |
774 | // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) -> |
775 | // unite -> uchar(0, 255) |
776 | // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) -> |
777 | // unite -> uchar(-2, 1) |
778 | RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What, |
779 | APSIntType Ty) { |
780 | using llvm::APInt; |
781 | using llvm::APSInt; |
782 | using Bounds = std::pair<const APSInt &, const APSInt &>; |
783 | ContainerType AscendArray; |
784 | ContainerType DescendArray; |
785 | auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds { |
786 | // Get bounds of the given range. |
787 | APSInt FromInt = R.From(); |
788 | APSInt ToInt = R.To(); |
789 | // Cast the bounds. |
790 | Ty.apply(Value&: FromInt); |
791 | Ty.apply(Value&: ToInt); |
792 | return {VF.getValue(X: FromInt), VF.getValue(X: ToInt)}; |
793 | }; |
794 | // Phase 1. Fill the first array. |
795 | APSInt LastConvertedInt = Ty.getMinValue(); |
796 | const auto *It = What.begin(); |
797 | const auto *E = What.end(); |
798 | while (It != E) { |
799 | Bounds NewBounds = CastRange(*(It++)); |
800 | // If values stop going acsending order, go to the second phase(loop). |
801 | if (NewBounds.first < LastConvertedInt) { |
802 | DescendArray.emplace_back(Args: NewBounds.first, Args: NewBounds.second); |
803 | break; |
804 | } |
805 | // If the range contains a midpoint, then split the range. |
806 | // E.g. char(-5, 5) -> uchar(251, 5) |
807 | // Here we shall add a range (251, 255) to the first array and (0, 5) to the |
808 | // second one. |
809 | if (NewBounds.first > NewBounds.second) { |
810 | DescendArray.emplace_back(Args: ValueFactory.getMinValue(T: Ty), Args: NewBounds.second); |
811 | AscendArray.emplace_back(Args: NewBounds.first, Args: ValueFactory.getMaxValue(T: Ty)); |
812 | } else |
813 | // Values are going acsending order. |
814 | AscendArray.emplace_back(Args: NewBounds.first, Args: NewBounds.second); |
815 | LastConvertedInt = NewBounds.first; |
816 | } |
817 | // Phase 2. Fill the second array. |
818 | while (It != E) { |
819 | Bounds NewBounds = CastRange(*(It++)); |
820 | DescendArray.emplace_back(Args: NewBounds.first, Args: NewBounds.second); |
821 | } |
822 | // Unite both arrays. |
823 | return unite(LHS: AscendArray, RHS: DescendArray); |
824 | } |
825 | |
826 | /// Promotion from unsigneds to signeds/unsigneds left. |
827 | RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What, |
828 | APSIntType Ty) { |
829 | ContainerType Result; |
830 | // We definitely know the size of the result set. |
831 | Result.reserve(N: What.size()); |
832 | |
833 | // Each unsigned value fits every larger type without any changes, |
834 | // whether the larger type is signed or unsigned. So just promote and push |
835 | // back each range one by one. |
836 | for (const Range &R : What) { |
837 | // Get bounds of the given range. |
838 | llvm::APSInt FromInt = R.From(); |
839 | llvm::APSInt ToInt = R.To(); |
840 | // Cast the bounds. |
841 | Ty.apply(Value&: FromInt); |
842 | Ty.apply(Value&: ToInt); |
843 | Result.emplace_back(Args: ValueFactory.getValue(X: FromInt), |
844 | Args: ValueFactory.getValue(X: ToInt)); |
845 | } |
846 | return Result; |
847 | } |
848 | |
849 | RangeSet RangeSet::Factory::deletePoint(RangeSet From, |
850 | const llvm::APSInt &Point) { |
851 | if (!From.contains(Point)) |
852 | return From; |
853 | |
854 | llvm::APSInt Upper = Point; |
855 | llvm::APSInt Lower = Point; |
856 | |
857 | ++Upper; |
858 | --Lower; |
859 | |
860 | // Notice that the lower bound is greater than the upper bound. |
861 | return intersect(What: From, Lower: Upper, Upper: Lower); |
862 | } |
863 | |
864 | LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const { |
865 | OS << '[' << toString(I: From(), Radix: 10) << ", " << toString(I: To(), Radix: 10) << ']'; |
866 | } |
867 | LLVM_DUMP_METHOD void Range::dump() const { dump(OS&: llvm::errs()); } |
868 | |
869 | LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const { |
870 | OS << "{ " ; |
871 | llvm::interleaveComma(c: *this, os&: OS, each_fn: [&OS](const Range &R) { R.dump(OS); }); |
872 | OS << " }" ; |
873 | } |
874 | LLVM_DUMP_METHOD void RangeSet::dump() const { dump(OS&: llvm::errs()); } |
875 | |
876 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef) |
877 | |
878 | namespace { |
879 | class EquivalenceClass; |
880 | } // end anonymous namespace |
881 | |
882 | REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass) |
883 | REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet) |
884 | REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet) |
885 | |
886 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass) |
887 | REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet) |
888 | |
889 | namespace { |
890 | /// This class encapsulates a set of symbols equal to each other. |
891 | /// |
892 | /// The main idea of the approach requiring such classes is in narrowing |
893 | /// and sharing constraints between symbols within the class. Also we can |
894 | /// conclude that there is no practical need in storing constraints for |
895 | /// every member of the class separately. |
896 | /// |
897 | /// Main terminology: |
898 | /// |
899 | /// * "Equivalence class" is an object of this class, which can be efficiently |
900 | /// compared to other classes. It represents the whole class without |
901 | /// storing the actual in it. The members of the class however can be |
902 | /// retrieved from the state. |
903 | /// |
904 | /// * "Class members" are the symbols corresponding to the class. This means |
905 | /// that A == B for every member symbols A and B from the class. Members of |
906 | /// each class are stored in the state. |
907 | /// |
908 | /// * "Trivial class" is a class that has and ever had only one same symbol. |
909 | /// |
910 | /// * "Merge operation" merges two classes into one. It is the main operation |
911 | /// to produce non-trivial classes. |
912 | /// If, at some point, we can assume that two symbols from two distinct |
913 | /// classes are equal, we can merge these classes. |
914 | class EquivalenceClass : public llvm::FoldingSetNode { |
915 | public: |
916 | /// Find equivalence class for the given symbol in the given state. |
917 | [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State, |
918 | SymbolRef Sym); |
919 | |
920 | /// Merge classes for the given symbols and return a new state. |
921 | [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F, |
922 | ProgramStateRef State, |
923 | SymbolRef First, |
924 | SymbolRef Second); |
925 | // Merge this class with the given class and return a new state. |
926 | [[nodiscard]] inline ProgramStateRef |
927 | merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other); |
928 | |
929 | /// Return a set of class members for the given state. |
930 | [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const; |
931 | |
932 | /// Return true if the current class is trivial in the given state. |
933 | /// A class is trivial if and only if there is not any member relations stored |
934 | /// to it in State/ClassMembers. |
935 | /// An equivalence class with one member might seem as it does not hold any |
936 | /// meaningful information, i.e. that is a tautology. However, during the |
937 | /// removal of dead symbols we do not remove classes with one member for |
938 | /// resource and performance reasons. Consequently, a class with one member is |
939 | /// not necessarily trivial. It could happen that we have a class with two |
940 | /// members and then during the removal of dead symbols we remove one of its |
941 | /// members. In this case, the class is still non-trivial (it still has the |
942 | /// mappings in ClassMembers), even though it has only one member. |
943 | [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const; |
944 | |
945 | /// Return true if the current class is trivial and its only member is dead. |
946 | [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State, |
947 | SymbolReaper &Reaper) const; |
948 | |
949 | [[nodiscard]] static inline ProgramStateRef |
950 | markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First, |
951 | SymbolRef Second); |
952 | [[nodiscard]] static inline ProgramStateRef |
953 | markDisequal(RangeSet::Factory &F, ProgramStateRef State, |
954 | EquivalenceClass First, EquivalenceClass Second); |
955 | [[nodiscard]] inline ProgramStateRef |
956 | markDisequal(RangeSet::Factory &F, ProgramStateRef State, |
957 | EquivalenceClass Other) const; |
958 | [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State, |
959 | SymbolRef Sym); |
960 | [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const; |
961 | [[nodiscard]] inline ClassSet |
962 | getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const; |
963 | |
964 | [[nodiscard]] static inline std::optional<bool> |
965 | areEqual(ProgramStateRef State, EquivalenceClass First, |
966 | EquivalenceClass Second); |
967 | [[nodiscard]] static inline std::optional<bool> |
968 | areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); |
969 | |
970 | /// Remove one member from the class. |
971 | [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State, |
972 | const SymbolRef Old); |
973 | |
974 | /// Iterate over all symbols and try to simplify them. |
975 | [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB, |
976 | RangeSet::Factory &F, |
977 | ProgramStateRef State, |
978 | EquivalenceClass Class); |
979 | |
980 | void dumpToStream(ProgramStateRef State, raw_ostream &os) const; |
981 | LLVM_DUMP_METHOD void dump(ProgramStateRef State) const { |
982 | dumpToStream(State, os&: llvm::errs()); |
983 | } |
984 | |
985 | /// Check equivalence data for consistency. |
986 | [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool |
987 | isClassDataConsistent(ProgramStateRef State); |
988 | |
989 | [[nodiscard]] QualType getType() const { |
990 | return getRepresentativeSymbol()->getType(); |
991 | } |
992 | |
993 | EquivalenceClass() = delete; |
994 | EquivalenceClass(const EquivalenceClass &) = default; |
995 | EquivalenceClass &operator=(const EquivalenceClass &) = delete; |
996 | EquivalenceClass(EquivalenceClass &&) = default; |
997 | EquivalenceClass &operator=(EquivalenceClass &&) = delete; |
998 | |
999 | bool operator==(const EquivalenceClass &Other) const { |
1000 | return ID == Other.ID; |
1001 | } |
1002 | bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; } |
1003 | bool operator!=(const EquivalenceClass &Other) const { |
1004 | return !operator==(Other); |
1005 | } |
1006 | |
1007 | static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) { |
1008 | ID.AddInteger(I: CID); |
1009 | } |
1010 | |
1011 | void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, CID: this->ID); } |
1012 | |
1013 | private: |
1014 | /* implicit */ EquivalenceClass(SymbolRef Sym) |
1015 | : ID(reinterpret_cast<uintptr_t>(Sym)) {} |
1016 | |
1017 | /// This function is intended to be used ONLY within the class. |
1018 | /// The fact that ID is a pointer to a symbol is an implementation detail |
1019 | /// and should stay that way. |
1020 | /// In the current implementation, we use it to retrieve the only member |
1021 | /// of the trivial class. |
1022 | SymbolRef getRepresentativeSymbol() const { |
1023 | return reinterpret_cast<SymbolRef>(ID); |
1024 | } |
1025 | static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State); |
1026 | |
1027 | inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State, |
1028 | SymbolSet Members, EquivalenceClass Other, |
1029 | SymbolSet OtherMembers); |
1030 | |
1031 | static inline bool |
1032 | addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, |
1033 | RangeSet::Factory &F, ProgramStateRef State, |
1034 | EquivalenceClass First, EquivalenceClass Second); |
1035 | |
1036 | /// This is a unique identifier of the class. |
1037 | uintptr_t ID; |
1038 | }; |
1039 | |
1040 | //===----------------------------------------------------------------------===// |
1041 | // Constraint functions |
1042 | //===----------------------------------------------------------------------===// |
1043 | |
1044 | [[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool |
1045 | areFeasible(ConstraintRangeTy Constraints) { |
1046 | return llvm::none_of( |
1047 | Range&: Constraints, |
1048 | P: [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) { |
1049 | return ClassConstraint.second.isEmpty(); |
1050 | }); |
1051 | } |
1052 | |
1053 | [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State, |
1054 | EquivalenceClass Class) { |
1055 | return State->get<ConstraintRange>(key: Class); |
1056 | } |
1057 | |
1058 | [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State, |
1059 | SymbolRef Sym) { |
1060 | return getConstraint(State, Class: EquivalenceClass::find(State, Sym)); |
1061 | } |
1062 | |
1063 | [[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State, |
1064 | EquivalenceClass Class, |
1065 | RangeSet Constraint) { |
1066 | return State->set<ConstraintRange>(K: Class, E: Constraint); |
1067 | } |
1068 | |
1069 | [[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State, |
1070 | ConstraintRangeTy Constraints) { |
1071 | return State->set<ConstraintRange>(Constraints); |
1072 | } |
1073 | |
1074 | //===----------------------------------------------------------------------===// |
1075 | // Equality/diseqiality abstraction |
1076 | //===----------------------------------------------------------------------===// |
1077 | |
1078 | /// A small helper function for detecting symbolic (dis)equality. |
1079 | /// |
1080 | /// Equality check can have different forms (like a == b or a - b) and this |
1081 | /// class encapsulates those away if the only thing the user wants to check - |
1082 | /// whether it's equality/diseqiality or not. |
1083 | /// |
1084 | /// \returns true if assuming this Sym to be true means equality of operands |
1085 | /// false if it means disequality of operands |
1086 | /// std::nullopt otherwise |
1087 | std::optional<bool> meansEquality(const SymSymExpr *Sym) { |
1088 | switch (Sym->getOpcode()) { |
1089 | case BO_Sub: |
1090 | // This case is: A - B != 0 -> disequality check. |
1091 | return false; |
1092 | case BO_EQ: |
1093 | // This case is: A == B != 0 -> equality check. |
1094 | return true; |
1095 | case BO_NE: |
1096 | // This case is: A != B != 0 -> diseqiality check. |
1097 | return false; |
1098 | default: |
1099 | return std::nullopt; |
1100 | } |
1101 | } |
1102 | |
1103 | //===----------------------------------------------------------------------===// |
1104 | // Intersection functions |
1105 | //===----------------------------------------------------------------------===// |
1106 | |
1107 | template <class SecondTy, class... RestTy> |
1108 | [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, |
1109 | SecondTy Second, RestTy... Tail); |
1110 | |
1111 | template <class... RangeTy> struct IntersectionTraits; |
1112 | |
1113 | template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> { |
1114 | // Found RangeSet, no need to check any further |
1115 | using Type = RangeSet; |
1116 | }; |
1117 | |
1118 | template <> struct IntersectionTraits<> { |
1119 | // We ran out of types, and we didn't find any RangeSet, so the result should |
1120 | // be optional. |
1121 | using Type = std::optional<RangeSet>; |
1122 | }; |
1123 | |
1124 | template <class OptionalOrPointer, class... TailTy> |
1125 | struct IntersectionTraits<OptionalOrPointer, TailTy...> { |
1126 | // If current type is Optional or a raw pointer, we should keep looking. |
1127 | using Type = typename IntersectionTraits<TailTy...>::Type; |
1128 | }; |
1129 | |
1130 | template <class EndTy> |
1131 | [[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) { |
1132 | // If the list contains only RangeSet or std::optional<RangeSet>, simply |
1133 | // return that range set. |
1134 | return End; |
1135 | } |
1136 | |
1137 | [[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet> |
1138 | intersect(RangeSet::Factory &F, const RangeSet *End) { |
1139 | // This is an extraneous conversion from a raw pointer into |
1140 | // std::optional<RangeSet> |
1141 | if (End) { |
1142 | return *End; |
1143 | } |
1144 | return std::nullopt; |
1145 | } |
1146 | |
1147 | template <class... RestTy> |
1148 | [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, |
1149 | RangeSet Second, RestTy... Tail) { |
1150 | // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version |
1151 | // of the function and can be sure that the result is RangeSet. |
1152 | return intersect(F, F.intersect(LHS: Head, RHS: Second), Tail...); |
1153 | } |
1154 | |
1155 | template <class SecondTy, class... RestTy> |
1156 | [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, |
1157 | SecondTy Second, RestTy... Tail) { |
1158 | if (Second) { |
1159 | // Here we call the <RangeSet,RangeSet,...> version of the function... |
1160 | return intersect(F, Head, *Second, Tail...); |
1161 | } |
1162 | // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which |
1163 | // means that the result is definitely RangeSet. |
1164 | return intersect(F, Head, Tail...); |
1165 | } |
1166 | |
1167 | /// Main generic intersect function. |
1168 | /// It intersects all of the given range sets. If some of the given arguments |
1169 | /// don't hold a range set (nullptr or std::nullopt), the function will skip |
1170 | /// them. |
1171 | /// |
1172 | /// Available representations for the arguments are: |
1173 | /// * RangeSet |
1174 | /// * std::optional<RangeSet> |
1175 | /// * RangeSet * |
1176 | /// Pointer to a RangeSet is automatically assumed to be nullable and will get |
1177 | /// checked as well as the optional version. If this behaviour is undesired, |
1178 | /// please dereference the pointer in the call. |
1179 | /// |
1180 | /// Return type depends on the arguments' types. If we can be sure in compile |
1181 | /// time that there will be a range set as a result, the returning type is |
1182 | /// simply RangeSet, in other cases we have to back off to |
1183 | /// std::optional<RangeSet>. |
1184 | /// |
1185 | /// Please, prefer optional range sets to raw pointers. If the last argument is |
1186 | /// a raw pointer and all previous arguments are std::nullopt, it will cost one |
1187 | /// additional check to convert RangeSet * into std::optional<RangeSet>. |
1188 | template <class HeadTy, class SecondTy, class... RestTy> |
1189 | [[nodiscard]] inline |
1190 | typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type |
1191 | intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second, |
1192 | RestTy... Tail) { |
1193 | if (Head) { |
1194 | return intersect(F, *Head, Second, Tail...); |
1195 | } |
1196 | return intersect(F, Second, Tail...); |
1197 | } |
1198 | |
1199 | //===----------------------------------------------------------------------===// |
1200 | // Symbolic reasoning logic |
1201 | //===----------------------------------------------------------------------===// |
1202 | |
1203 | /// A little component aggregating all of the reasoning we have about |
1204 | /// the ranges of symbolic expressions. |
1205 | /// |
1206 | /// Even when we don't know the exact values of the operands, we still |
1207 | /// can get a pretty good estimate of the result's range. |
1208 | class SymbolicRangeInferrer |
1209 | : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> { |
1210 | public: |
1211 | template <class SourceType> |
1212 | static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State, |
1213 | SourceType Origin) { |
1214 | SymbolicRangeInferrer Inferrer(F, State); |
1215 | return Inferrer.infer(Origin); |
1216 | } |
1217 | |
1218 | RangeSet VisitSymExpr(SymbolRef Sym) { |
1219 | if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym)) |
1220 | return *RS; |
1221 | // If we've reached this line, the actual type of the symbolic |
1222 | // expression is not supported for advanced inference. |
1223 | // In this case, we simply backoff to the default "let's simply |
1224 | // infer the range from the expression's type". |
1225 | return infer(T: Sym->getType()); |
1226 | } |
1227 | |
1228 | RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) { |
1229 | if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE)) |
1230 | return *RS; |
1231 | return infer(T: USE->getType()); |
1232 | } |
1233 | |
1234 | RangeSet VisitSymIntExpr(const SymIntExpr *Sym) { |
1235 | return VisitBinaryOperator(Sym); |
1236 | } |
1237 | |
1238 | RangeSet VisitIntSymExpr(const IntSymExpr *Sym) { |
1239 | return VisitBinaryOperator(Sym); |
1240 | } |
1241 | |
1242 | RangeSet VisitSymSymExpr(const SymSymExpr *SSE) { |
1243 | return intersect( |
1244 | F&: RangeFactory, |
1245 | // If Sym is a difference of symbols A - B, then maybe we have range |
1246 | // set stored for B - A. |
1247 | // |
1248 | // If we have range set stored for both A - B and B - A then |
1249 | // calculate the effective range set by intersecting the range set |
1250 | // for A - B and the negated range set of B - A. |
1251 | Head: getRangeForNegatedSymSym(SSE), |
1252 | // If Sym is a comparison expression (except <=>), |
1253 | // find any other comparisons with the same operands. |
1254 | // See function description. |
1255 | Second: getRangeForComparisonSymbol(SSE), |
1256 | // If Sym is (dis)equality, we might have some information |
1257 | // on that in our equality classes data structure. |
1258 | Tail: getRangeForEqualities(Sym: SSE), |
1259 | // And we should always check what we can get from the operands. |
1260 | Tail: VisitBinaryOperator(Sym: SSE)); |
1261 | } |
1262 | |
1263 | private: |
1264 | SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S) |
1265 | : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {} |
1266 | |
1267 | /// Infer range information from the given integer constant. |
1268 | /// |
1269 | /// It's not a real "inference", but is here for operating with |
1270 | /// sub-expressions in a more polymorphic manner. |
1271 | RangeSet inferAs(const llvm::APSInt &Val, QualType) { |
1272 | return {RangeFactory, Val}; |
1273 | } |
1274 | |
1275 | /// Infer range information from symbol in the context of the given type. |
1276 | RangeSet inferAs(SymbolRef Sym, QualType DestType) { |
1277 | QualType ActualType = Sym->getType(); |
1278 | // Check that we can reason about the symbol at all. |
1279 | if (ActualType->isIntegralOrEnumerationType() || |
1280 | Loc::isLocType(T: ActualType)) { |
1281 | return infer(Sym); |
1282 | } |
1283 | // Otherwise, let's simply infer from the destination type. |
1284 | // We couldn't figure out nothing else about that expression. |
1285 | return infer(T: DestType); |
1286 | } |
1287 | |
1288 | RangeSet infer(SymbolRef Sym) { |
1289 | return intersect(F&: RangeFactory, |
1290 | // Of course, we should take the constraint directly |
1291 | // associated with this symbol into consideration. |
1292 | Head: getConstraint(State, Sym), |
1293 | // Apart from the Sym itself, we can infer quite a lot if |
1294 | // we look into subexpressions of Sym. |
1295 | Second: Visit(S: Sym)); |
1296 | } |
1297 | |
1298 | RangeSet infer(EquivalenceClass Class) { |
1299 | if (const RangeSet *AssociatedConstraint = getConstraint(State, Class)) |
1300 | return *AssociatedConstraint; |
1301 | |
1302 | return infer(T: Class.getType()); |
1303 | } |
1304 | |
1305 | /// Infer range information solely from the type. |
1306 | RangeSet infer(QualType T) { |
1307 | // Lazily generate a new RangeSet representing all possible values for the |
1308 | // given symbol type. |
1309 | RangeSet Result(RangeFactory, ValueFactory.getMinValue(T), |
1310 | ValueFactory.getMaxValue(T)); |
1311 | |
1312 | // References are known to be non-zero. |
1313 | if (T->isReferenceType()) |
1314 | return assumeNonZero(Domain: Result, T); |
1315 | |
1316 | return Result; |
1317 | } |
1318 | |
1319 | template <class BinarySymExprTy> |
1320 | RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) { |
1321 | // TODO #1: VisitBinaryOperator implementation might not make a good |
1322 | // use of the inferred ranges. In this case, we might be calculating |
1323 | // everything for nothing. This being said, we should introduce some |
1324 | // sort of laziness mechanism here. |
1325 | // |
1326 | // TODO #2: We didn't go into the nested expressions before, so it |
1327 | // might cause us spending much more time doing the inference. |
1328 | // This can be a problem for deeply nested expressions that are |
1329 | // involved in conditions and get tested continuously. We definitely |
1330 | // need to address this issue and introduce some sort of caching |
1331 | // in here. |
1332 | QualType ResultType = Sym->getType(); |
1333 | return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType), |
1334 | Sym->getOpcode(), |
1335 | inferAs(Sym->getRHS(), ResultType), ResultType); |
1336 | } |
1337 | |
1338 | RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op, |
1339 | RangeSet RHS, QualType T); |
1340 | |
1341 | //===----------------------------------------------------------------------===// |
1342 | // Ranges and operators |
1343 | //===----------------------------------------------------------------------===// |
1344 | |
1345 | /// Return a rough approximation of the given range set. |
1346 | /// |
1347 | /// For the range set: |
1348 | /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] } |
1349 | /// it will return the range [x_0, y_N]. |
1350 | static Range fillGaps(RangeSet Origin) { |
1351 | assert(!Origin.isEmpty()); |
1352 | return {Origin.getMinValue(), Origin.getMaxValue()}; |
1353 | } |
1354 | |
1355 | /// Try to convert given range into the given type. |
1356 | /// |
1357 | /// It will return std::nullopt only when the trivial conversion is possible. |
1358 | std::optional<Range> convert(const Range &Origin, APSIntType To) { |
1359 | if (To.testInRange(Val: Origin.From(), AllowMixedSign: false) != APSIntType::RTR_Within || |
1360 | To.testInRange(Val: Origin.To(), AllowMixedSign: false) != APSIntType::RTR_Within) { |
1361 | return std::nullopt; |
1362 | } |
1363 | return Range(ValueFactory.Convert(TargetType: To, From: Origin.From()), |
1364 | ValueFactory.Convert(TargetType: To, From: Origin.To())); |
1365 | } |
1366 | |
1367 | template <BinaryOperator::Opcode Op> |
1368 | RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) { |
1369 | assert(!LHS.isEmpty() && !RHS.isEmpty()); |
1370 | |
1371 | Range CoarseLHS = fillGaps(Origin: LHS); |
1372 | Range CoarseRHS = fillGaps(Origin: RHS); |
1373 | |
1374 | APSIntType ResultType = ValueFactory.getAPSIntType(T); |
1375 | |
1376 | // We need to convert ranges to the resulting type, so we can compare values |
1377 | // and combine them in a meaningful (in terms of the given operation) way. |
1378 | auto ConvertedCoarseLHS = convert(Origin: CoarseLHS, To: ResultType); |
1379 | auto ConvertedCoarseRHS = convert(Origin: CoarseRHS, To: ResultType); |
1380 | |
1381 | // It is hard to reason about ranges when conversion changes |
1382 | // borders of the ranges. |
1383 | if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) { |
1384 | return infer(T); |
1385 | } |
1386 | |
1387 | return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T); |
1388 | } |
1389 | |
1390 | template <BinaryOperator::Opcode Op> |
1391 | RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) { |
1392 | return infer(T); |
1393 | } |
1394 | |
1395 | /// Return a symmetrical range for the given range and type. |
1396 | /// |
1397 | /// If T is signed, return the smallest range [-x..x] that covers the original |
1398 | /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't |
1399 | /// exist due to original range covering min(T)). |
1400 | /// |
1401 | /// If T is unsigned, return the smallest range [0..x] that covers the |
1402 | /// original range. |
1403 | Range getSymmetricalRange(Range Origin, QualType T) { |
1404 | APSIntType RangeType = ValueFactory.getAPSIntType(T); |
1405 | |
1406 | if (RangeType.isUnsigned()) { |
1407 | return Range(ValueFactory.getMinValue(T: RangeType), Origin.To()); |
1408 | } |
1409 | |
1410 | if (Origin.From().isMinSignedValue()) { |
1411 | // If mini is a minimal signed value, absolute value of it is greater |
1412 | // than the maximal signed value. In order to avoid these |
1413 | // complications, we simply return the whole range. |
1414 | return {ValueFactory.getMinValue(T: RangeType), |
1415 | ValueFactory.getMaxValue(T: RangeType)}; |
1416 | } |
1417 | |
1418 | // At this point, we are sure that the type is signed and we can safely |
1419 | // use unary - operator. |
1420 | // |
1421 | // While calculating absolute maximum, we can use the following formula |
1422 | // because of these reasons: |
1423 | // * If From >= 0 then To >= From and To >= -From. |
1424 | // AbsMax == To == max(To, -From) |
1425 | // * If To <= 0 then -From >= -To and -From >= From. |
1426 | // AbsMax == -From == max(-From, To) |
1427 | // * Otherwise, From <= 0, To >= 0, and |
1428 | // AbsMax == max(abs(From), abs(To)) |
1429 | llvm::APSInt AbsMax = std::max(a: -Origin.From(), b: Origin.To()); |
1430 | |
1431 | // Intersection is guaranteed to be non-empty. |
1432 | return {ValueFactory.getValue(X: -AbsMax), ValueFactory.getValue(X: AbsMax)}; |
1433 | } |
1434 | |
1435 | /// Return a range set subtracting zero from \p Domain. |
1436 | RangeSet assumeNonZero(RangeSet Domain, QualType T) { |
1437 | APSIntType IntType = ValueFactory.getAPSIntType(T); |
1438 | return RangeFactory.deletePoint(From: Domain, Point: IntType.getZeroValue()); |
1439 | } |
1440 | |
1441 | template <typename ProduceNegatedSymFunc> |
1442 | std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F, |
1443 | QualType T) { |
1444 | // Do not negate if the type cannot be meaningfully negated. |
1445 | if (!T->isUnsignedIntegerOrEnumerationType() && |
1446 | !T->isSignedIntegerOrEnumerationType()) |
1447 | return std::nullopt; |
1448 | |
1449 | if (SymbolRef NegatedSym = F()) |
1450 | if (const RangeSet *NegatedRange = getConstraint(State, Sym: NegatedSym)) |
1451 | return RangeFactory.negate(What: *NegatedRange); |
1452 | |
1453 | return std::nullopt; |
1454 | } |
1455 | |
1456 | std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) { |
1457 | // Just get the operand when we negate a symbol that is already negated. |
1458 | // -(-a) == a |
1459 | return getRangeForNegatedExpr( |
1460 | F: [USE]() -> SymbolRef { |
1461 | if (USE->getOpcode() == UO_Minus) |
1462 | return USE->getOperand(); |
1463 | return nullptr; |
1464 | }, |
1465 | T: USE->getType()); |
1466 | } |
1467 | |
1468 | std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) { |
1469 | return getRangeForNegatedExpr( |
1470 | [SSE, State = this->State]() -> SymbolRef { |
1471 | if (SSE->getOpcode() == BO_Sub) |
1472 | return State->getSymbolManager().getSymSymExpr( |
1473 | lhs: SSE->getRHS(), op: BO_Sub, rhs: SSE->getLHS(), t: SSE->getType()); |
1474 | return nullptr; |
1475 | }, |
1476 | SSE->getType()); |
1477 | } |
1478 | |
1479 | std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) { |
1480 | return getRangeForNegatedExpr( |
1481 | F: [Sym, State = this->State]() { |
1482 | return State->getSymbolManager().getUnarySymExpr(operand: Sym, op: UO_Minus, |
1483 | t: Sym->getType()); |
1484 | }, |
1485 | T: Sym->getType()); |
1486 | } |
1487 | |
1488 | // Returns ranges only for binary comparison operators (except <=>) |
1489 | // when left and right operands are symbolic values. |
1490 | // Finds any other comparisons with the same operands. |
1491 | // Then do logical calculations and refuse impossible branches. |
1492 | // E.g. (x < y) and (x > y) at the same time are impossible. |
1493 | // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only. |
1494 | // E.g. (x == y) and (y == x) are just reversed but the same. |
1495 | // It covers all possible combinations (see CmpOpTable description). |
1496 | // Note that `x` and `y` can also stand for subexpressions, |
1497 | // not only for actual symbols. |
1498 | std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) { |
1499 | const BinaryOperatorKind CurrentOP = SSE->getOpcode(); |
1500 | |
1501 | // We currently do not support <=> (C++20). |
1502 | if (!BinaryOperator::isComparisonOp(Opc: CurrentOP) || (CurrentOP == BO_Cmp)) |
1503 | return std::nullopt; |
1504 | |
1505 | static const OperatorRelationsTable CmpOpTable{}; |
1506 | |
1507 | const SymExpr *LHS = SSE->getLHS(); |
1508 | const SymExpr *RHS = SSE->getRHS(); |
1509 | QualType T = SSE->getType(); |
1510 | |
1511 | SymbolManager &SymMgr = State->getSymbolManager(); |
1512 | |
1513 | // We use this variable to store the last queried operator (`QueriedOP`) |
1514 | // for which the `getCmpOpState` returned with `Unknown`. If there are two |
1515 | // different OPs that returned `Unknown` then we have to query the special |
1516 | // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)` |
1517 | // never returns `Unknown`, so `CurrentOP` is a good initial value. |
1518 | BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP; |
1519 | |
1520 | // Loop goes through all of the columns exept the last one ('UnknownX2'). |
1521 | // We treat `UnknownX2` column separately at the end of the loop body. |
1522 | for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) { |
1523 | |
1524 | // Let's find an expression e.g. (x < y). |
1525 | BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(Index: i); |
1526 | const SymSymExpr *SymSym = SymMgr.getSymSymExpr(lhs: LHS, op: QueriedOP, rhs: RHS, t: T); |
1527 | const RangeSet *QueriedRangeSet = getConstraint(State, SymSym); |
1528 | |
1529 | // If ranges were not previously found, |
1530 | // try to find a reversed expression (y > x). |
1531 | if (!QueriedRangeSet) { |
1532 | const BinaryOperatorKind ROP = |
1533 | BinaryOperator::reverseComparisonOp(Opc: QueriedOP); |
1534 | SymSym = SymMgr.getSymSymExpr(lhs: RHS, op: ROP, rhs: LHS, t: T); |
1535 | QueriedRangeSet = getConstraint(State, SymSym); |
1536 | } |
1537 | |
1538 | if (!QueriedRangeSet || QueriedRangeSet->isEmpty()) |
1539 | continue; |
1540 | |
1541 | const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue(); |
1542 | const bool isInFalseBranch = |
1543 | ConcreteValue ? (*ConcreteValue == 0) : false; |
1544 | |
1545 | // If it is a false branch, we shall be guided by opposite operator, |
1546 | // because the table is made assuming we are in the true branch. |
1547 | // E.g. when (x <= y) is false, then (x > y) is true. |
1548 | if (isInFalseBranch) |
1549 | QueriedOP = BinaryOperator::negateComparisonOp(Opc: QueriedOP); |
1550 | |
1551 | OperatorRelationsTable::TriStateKind BranchState = |
1552 | CmpOpTable.getCmpOpState(CurrentOP, QueriedOP); |
1553 | |
1554 | if (BranchState == OperatorRelationsTable::Unknown) { |
1555 | if (LastQueriedOpToUnknown != CurrentOP && |
1556 | LastQueriedOpToUnknown != QueriedOP) { |
1557 | // If we got the Unknown state for both different operators. |
1558 | // if (x <= y) // assume true |
1559 | // if (x != y) // assume true |
1560 | // if (x < y) // would be also true |
1561 | // Get a state from `UnknownX2` column. |
1562 | BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP); |
1563 | } else { |
1564 | LastQueriedOpToUnknown = QueriedOP; |
1565 | continue; |
1566 | } |
1567 | } |
1568 | |
1569 | return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T) |
1570 | : getFalseRange(T); |
1571 | } |
1572 | |
1573 | return std::nullopt; |
1574 | } |
1575 | |
1576 | std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) { |
1577 | std::optional<bool> Equality = meansEquality(Sym); |
1578 | |
1579 | if (!Equality) |
1580 | return std::nullopt; |
1581 | |
1582 | if (std::optional<bool> AreEqual = |
1583 | EquivalenceClass::areEqual(State, First: Sym->getLHS(), Second: Sym->getRHS())) { |
1584 | // Here we cover two cases at once: |
1585 | // * if Sym is equality and its operands are known to be equal -> true |
1586 | // * if Sym is disequality and its operands are disequal -> true |
1587 | if (*AreEqual == *Equality) { |
1588 | return getTrueRange(T: Sym->getType()); |
1589 | } |
1590 | // Opposite combinations result in false. |
1591 | return getFalseRange(T: Sym->getType()); |
1592 | } |
1593 | |
1594 | return std::nullopt; |
1595 | } |
1596 | |
1597 | RangeSet getTrueRange(QualType T) { |
1598 | RangeSet TypeRange = infer(T); |
1599 | return assumeNonZero(Domain: TypeRange, T); |
1600 | } |
1601 | |
1602 | RangeSet getFalseRange(QualType T) { |
1603 | const llvm::APSInt &Zero = ValueFactory.getValue(X: 0, T); |
1604 | return RangeSet(RangeFactory, Zero); |
1605 | } |
1606 | |
1607 | BasicValueFactory &ValueFactory; |
1608 | RangeSet::Factory &RangeFactory; |
1609 | ProgramStateRef State; |
1610 | }; |
1611 | |
1612 | //===----------------------------------------------------------------------===// |
1613 | // Range-based reasoning about symbolic operations |
1614 | //===----------------------------------------------------------------------===// |
1615 | |
1616 | template <> |
1617 | RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS, |
1618 | RangeSet RHS, |
1619 | QualType T) { |
1620 | assert(!LHS.isEmpty() && !RHS.isEmpty()); |
1621 | |
1622 | if (LHS.getAPSIntType() == RHS.getAPSIntType()) { |
1623 | if (intersect(F&: RangeFactory, Head: LHS, Second: RHS).isEmpty()) |
1624 | return getTrueRange(T); |
1625 | |
1626 | } else { |
1627 | // We can only lose information if we are casting smaller signed type to |
1628 | // bigger unsigned type. For e.g., |
1629 | // LHS (unsigned short): [2, USHRT_MAX] |
1630 | // RHS (signed short): [SHRT_MIN, 0] |
1631 | // |
1632 | // Casting RHS to LHS type will leave us with overlapping values |
1633 | // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX] |
1634 | // |
1635 | // We can avoid this by checking if signed type's maximum value is lesser |
1636 | // than unsigned type's minimum value. |
1637 | |
1638 | // If both have different signs then only we can get more information. |
1639 | if (LHS.isUnsigned() != RHS.isUnsigned()) { |
1640 | if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) { |
1641 | if (RHS.getMaxValue().isNegative() || |
1642 | LHS.getAPSIntType().convert(Value: RHS.getMaxValue()) < LHS.getMinValue()) |
1643 | return getTrueRange(T); |
1644 | |
1645 | } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) { |
1646 | if (LHS.getMaxValue().isNegative() || |
1647 | RHS.getAPSIntType().convert(Value: LHS.getMaxValue()) < RHS.getMinValue()) |
1648 | return getTrueRange(T); |
1649 | } |
1650 | } |
1651 | |
1652 | // Both RangeSets should be casted to bigger unsigned type. |
1653 | APSIntType CastingType(std::max(a: LHS.getBitWidth(), b: RHS.getBitWidth()), |
1654 | LHS.isUnsigned() || RHS.isUnsigned()); |
1655 | |
1656 | RangeSet CastedLHS = RangeFactory.castTo(What: LHS, Ty: CastingType); |
1657 | RangeSet CastedRHS = RangeFactory.castTo(What: RHS, Ty: CastingType); |
1658 | |
1659 | if (intersect(F&: RangeFactory, Head: CastedLHS, Second: CastedRHS).isEmpty()) |
1660 | return getTrueRange(T); |
1661 | } |
1662 | |
1663 | // In all other cases, the resulting range cannot be deduced. |
1664 | return infer(T); |
1665 | } |
1666 | |
1667 | template <> |
1668 | RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS, |
1669 | QualType T) { |
1670 | APSIntType ResultType = ValueFactory.getAPSIntType(T); |
1671 | llvm::APSInt Zero = ResultType.getZeroValue(); |
1672 | |
1673 | bool IsLHSPositiveOrZero = LHS.From() >= Zero; |
1674 | bool IsRHSPositiveOrZero = RHS.From() >= Zero; |
1675 | |
1676 | bool IsLHSNegative = LHS.To() < Zero; |
1677 | bool IsRHSNegative = RHS.To() < Zero; |
1678 | |
1679 | // Check if both ranges have the same sign. |
1680 | if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) || |
1681 | (IsLHSNegative && IsRHSNegative)) { |
1682 | // The result is definitely greater or equal than any of the operands. |
1683 | const llvm::APSInt &Min = std::max(a: LHS.From(), b: RHS.From()); |
1684 | |
1685 | // We estimate maximal value for positives as the maximal value for the |
1686 | // given type. For negatives, we estimate it with -1 (e.g. 0x11111111). |
1687 | // |
1688 | // TODO: We basically, limit the resulting range from below, but don't do |
1689 | // anything with the upper bound. |
1690 | // |
1691 | // For positive operands, it can be done as follows: for the upper |
1692 | // bound of LHS and RHS we calculate the most significant bit set. |
1693 | // Let's call it the N-th bit. Then we can estimate the maximal |
1694 | // number to be 2^(N+1)-1, i.e. the number with all the bits up to |
1695 | // the N-th bit set. |
1696 | const llvm::APSInt &Max = IsLHSNegative |
1697 | ? ValueFactory.getValue(X: --Zero) |
1698 | : ValueFactory.getMaxValue(T: ResultType); |
1699 | |
1700 | return {RangeFactory, ValueFactory.getValue(X: Min), Max}; |
1701 | } |
1702 | |
1703 | // Otherwise, let's check if at least one of the operands is negative. |
1704 | if (IsLHSNegative || IsRHSNegative) { |
1705 | // This means that the result is definitely negative as well. |
1706 | return {RangeFactory, ValueFactory.getMinValue(T: ResultType), |
1707 | ValueFactory.getValue(X: --Zero)}; |
1708 | } |
1709 | |
1710 | RangeSet DefaultRange = infer(T); |
1711 | |
1712 | // It is pretty hard to reason about operands with different signs |
1713 | // (and especially with possibly different signs). We simply check if it |
1714 | // can be zero. In order to conclude that the result could not be zero, |
1715 | // at least one of the operands should be definitely not zero itself. |
1716 | if (!LHS.Includes(Point: Zero) || !RHS.Includes(Point: Zero)) { |
1717 | return assumeNonZero(Domain: DefaultRange, T); |
1718 | } |
1719 | |
1720 | // Nothing much else to do here. |
1721 | return DefaultRange; |
1722 | } |
1723 | |
1724 | template <> |
1725 | RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS, |
1726 | Range RHS, |
1727 | QualType T) { |
1728 | APSIntType ResultType = ValueFactory.getAPSIntType(T); |
1729 | llvm::APSInt Zero = ResultType.getZeroValue(); |
1730 | |
1731 | bool IsLHSPositiveOrZero = LHS.From() >= Zero; |
1732 | bool IsRHSPositiveOrZero = RHS.From() >= Zero; |
1733 | |
1734 | bool IsLHSNegative = LHS.To() < Zero; |
1735 | bool IsRHSNegative = RHS.To() < Zero; |
1736 | |
1737 | // Check if both ranges have the same sign. |
1738 | if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) || |
1739 | (IsLHSNegative && IsRHSNegative)) { |
1740 | // The result is definitely less or equal than any of the operands. |
1741 | const llvm::APSInt &Max = std::min(a: LHS.To(), b: RHS.To()); |
1742 | |
1743 | // We conservatively estimate lower bound to be the smallest positive |
1744 | // or negative value corresponding to the sign of the operands. |
1745 | const llvm::APSInt &Min = IsLHSNegative |
1746 | ? ValueFactory.getMinValue(T: ResultType) |
1747 | : ValueFactory.getValue(X: Zero); |
1748 | |
1749 | return {RangeFactory, Min, Max}; |
1750 | } |
1751 | |
1752 | // Otherwise, let's check if at least one of the operands is positive. |
1753 | if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) { |
1754 | // This makes result definitely positive. |
1755 | // |
1756 | // We can also reason about a maximal value by finding the maximal |
1757 | // value of the positive operand. |
1758 | const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To(); |
1759 | |
1760 | // The minimal value on the other hand is much harder to reason about. |
1761 | // The only thing we know for sure is that the result is positive. |
1762 | return {RangeFactory, ValueFactory.getValue(X: Zero), |
1763 | ValueFactory.getValue(X: Max)}; |
1764 | } |
1765 | |
1766 | // Nothing much else to do here. |
1767 | return infer(T); |
1768 | } |
1769 | |
1770 | template <> |
1771 | RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, |
1772 | Range RHS, |
1773 | QualType T) { |
1774 | llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue(); |
1775 | |
1776 | Range ConservativeRange = getSymmetricalRange(Origin: RHS, T); |
1777 | |
1778 | llvm::APSInt Max = ConservativeRange.To(); |
1779 | llvm::APSInt Min = ConservativeRange.From(); |
1780 | |
1781 | if (Max == Zero) { |
1782 | // It's an undefined behaviour to divide by 0 and it seems like we know |
1783 | // for sure that RHS is 0. Let's say that the resulting range is |
1784 | // simply infeasible for that matter. |
1785 | return RangeFactory.getEmptySet(); |
1786 | } |
1787 | |
1788 | // At this point, our conservative range is closed. The result, however, |
1789 | // couldn't be greater than the RHS' maximal absolute value. Because of |
1790 | // this reason, we turn the range into open (or half-open in case of |
1791 | // unsigned integers). |
1792 | // |
1793 | // While we operate on integer values, an open interval (a, b) can be easily |
1794 | // represented by the closed interval [a + 1, b - 1]. And this is exactly |
1795 | // what we do next. |
1796 | // |
1797 | // If we are dealing with unsigned case, we shouldn't move the lower bound. |
1798 | if (Min.isSigned()) { |
1799 | ++Min; |
1800 | } |
1801 | --Max; |
1802 | |
1803 | bool IsLHSPositiveOrZero = LHS.From() >= Zero; |
1804 | bool IsRHSPositiveOrZero = RHS.From() >= Zero; |
1805 | |
1806 | // Remainder operator results with negative operands is implementation |
1807 | // defined. Positive cases are much easier to reason about though. |
1808 | if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) { |
1809 | // If maximal value of LHS is less than maximal value of RHS, |
1810 | // the result won't get greater than LHS.To(). |
1811 | Max = std::min(a: LHS.To(), b: Max); |
1812 | // We want to check if it is a situation similar to the following: |
1813 | // |
1814 | // <------------|---[ LHS ]--------[ RHS ]-----> |
1815 | // -INF 0 +INF |
1816 | // |
1817 | // In this situation, we can conclude that (LHS / RHS) == 0 and |
1818 | // (LHS % RHS) == LHS. |
1819 | Min = LHS.To() < RHS.From() ? LHS.From() : Zero; |
1820 | } |
1821 | |
1822 | // Nevertheless, the symmetrical range for RHS is a conservative estimate |
1823 | // for any sign of either LHS, or RHS. |
1824 | return {RangeFactory, ValueFactory.getValue(X: Min), ValueFactory.getValue(X: Max)}; |
1825 | } |
1826 | |
1827 | RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS, |
1828 | BinaryOperator::Opcode Op, |
1829 | RangeSet RHS, QualType T) { |
1830 | // We should propagate information about unfeasbility of one of the |
1831 | // operands to the resulting range. |
1832 | if (LHS.isEmpty() || RHS.isEmpty()) { |
1833 | return RangeFactory.getEmptySet(); |
1834 | } |
1835 | |
1836 | switch (Op) { |
1837 | case BO_NE: |
1838 | return VisitBinaryOperator<BO_NE>(LHS, RHS, T); |
1839 | case BO_Or: |
1840 | return VisitBinaryOperator<BO_Or>(LHS, RHS, T); |
1841 | case BO_And: |
1842 | return VisitBinaryOperator<BO_And>(LHS, RHS, T); |
1843 | case BO_Rem: |
1844 | return VisitBinaryOperator<BO_Rem>(LHS, RHS, T); |
1845 | default: |
1846 | return infer(T); |
1847 | } |
1848 | } |
1849 | |
1850 | //===----------------------------------------------------------------------===// |
1851 | // Constraint manager implementation details |
1852 | //===----------------------------------------------------------------------===// |
1853 | |
1854 | class RangeConstraintManager : public RangedConstraintManager { |
1855 | public: |
1856 | RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) |
1857 | : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} |
1858 | |
1859 | //===------------------------------------------------------------------===// |
1860 | // Implementation for interface from ConstraintManager. |
1861 | //===------------------------------------------------------------------===// |
1862 | |
1863 | bool haveEqualConstraints(ProgramStateRef S1, |
1864 | ProgramStateRef S2) const override { |
1865 | // NOTE: ClassMembers are as simple as back pointers for ClassMap, |
1866 | // so comparing constraint ranges and class maps should be |
1867 | // sufficient. |
1868 | return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() && |
1869 | S1->get<ClassMap>() == S2->get<ClassMap>(); |
1870 | } |
1871 | |
1872 | bool canReasonAbout(SVal X) const override; |
1873 | |
1874 | ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override; |
1875 | |
1876 | const llvm::APSInt *getSymVal(ProgramStateRef State, |
1877 | SymbolRef Sym) const override; |
1878 | |
1879 | const llvm::APSInt *getSymMinVal(ProgramStateRef State, |
1880 | SymbolRef Sym) const override; |
1881 | |
1882 | const llvm::APSInt *getSymMaxVal(ProgramStateRef State, |
1883 | SymbolRef Sym) const override; |
1884 | |
1885 | ProgramStateRef removeDeadBindings(ProgramStateRef State, |
1886 | SymbolReaper &SymReaper) override; |
1887 | |
1888 | void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n" , |
1889 | unsigned int Space = 0, bool IsDot = false) const override; |
1890 | void printValue(raw_ostream &Out, ProgramStateRef State, |
1891 | SymbolRef Sym) override; |
1892 | void printConstraints(raw_ostream &Out, ProgramStateRef State, |
1893 | const char *NL = "\n" , unsigned int Space = 0, |
1894 | bool IsDot = false) const; |
1895 | void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, |
1896 | const char *NL = "\n" , unsigned int Space = 0, |
1897 | bool IsDot = false) const; |
1898 | void printDisequalities(raw_ostream &Out, ProgramStateRef State, |
1899 | const char *NL = "\n" , unsigned int Space = 0, |
1900 | bool IsDot = false) const; |
1901 | |
1902 | //===------------------------------------------------------------------===// |
1903 | // Implementation for interface from RangedConstraintManager. |
1904 | //===------------------------------------------------------------------===// |
1905 | |
1906 | ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, |
1907 | const llvm::APSInt &V, |
1908 | const llvm::APSInt &Adjustment) override; |
1909 | |
1910 | ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, |
1911 | const llvm::APSInt &V, |
1912 | const llvm::APSInt &Adjustment) override; |
1913 | |
1914 | ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, |
1915 | const llvm::APSInt &V, |
1916 | const llvm::APSInt &Adjustment) override; |
1917 | |
1918 | ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, |
1919 | const llvm::APSInt &V, |
1920 | const llvm::APSInt &Adjustment) override; |
1921 | |
1922 | ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, |
1923 | const llvm::APSInt &V, |
1924 | const llvm::APSInt &Adjustment) override; |
1925 | |
1926 | ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, |
1927 | const llvm::APSInt &V, |
1928 | const llvm::APSInt &Adjustment) override; |
1929 | |
1930 | ProgramStateRef assumeSymWithinInclusiveRange( |
1931 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
1932 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; |
1933 | |
1934 | ProgramStateRef assumeSymOutsideInclusiveRange( |
1935 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
1936 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; |
1937 | |
1938 | private: |
1939 | RangeSet::Factory F; |
1940 | |
1941 | RangeSet getRange(ProgramStateRef State, SymbolRef Sym); |
1942 | RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); |
1943 | ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, |
1944 | RangeSet Range); |
1945 | ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, |
1946 | RangeSet Range); |
1947 | |
1948 | RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, |
1949 | const llvm::APSInt &Int, |
1950 | const llvm::APSInt &Adjustment); |
1951 | RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym, |
1952 | const llvm::APSInt &Int, |
1953 | const llvm::APSInt &Adjustment); |
1954 | RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym, |
1955 | const llvm::APSInt &Int, |
1956 | const llvm::APSInt &Adjustment); |
1957 | RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS, |
1958 | const llvm::APSInt &Int, |
1959 | const llvm::APSInt &Adjustment); |
1960 | RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, |
1961 | const llvm::APSInt &Int, |
1962 | const llvm::APSInt &Adjustment); |
1963 | }; |
1964 | |
1965 | //===----------------------------------------------------------------------===// |
1966 | // Constraint assignment logic |
1967 | //===----------------------------------------------------------------------===// |
1968 | |
1969 | /// ConstraintAssignorBase is a small utility class that unifies visitor |
1970 | /// for ranges with a visitor for constraints (rangeset/range/constant). |
1971 | /// |
1972 | /// It is designed to have one derived class, but generally it can have more. |
1973 | /// Derived class can control which types we handle by defining methods of the |
1974 | /// following form: |
1975 | /// |
1976 | /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym, |
1977 | /// CONSTRAINT Constraint); |
1978 | /// |
1979 | /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.) |
1980 | /// CONSTRAINT is the type of constraint (RangeSet/Range/Const) |
1981 | /// return value signifies whether we should try other handle methods |
1982 | /// (i.e. false would mean to stop right after calling this method) |
1983 | template <class Derived> class ConstraintAssignorBase { |
1984 | public: |
1985 | using Const = const llvm::APSInt &; |
1986 | |
1987 | #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint) |
1988 | |
1989 | #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \ |
1990 | if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \ |
1991 | return false |
1992 | |
1993 | void assign(SymbolRef Sym, RangeSet Constraint) { |
1994 | assignImpl(Sym, Constraint); |
1995 | } |
1996 | |
1997 | bool assignImpl(SymbolRef Sym, RangeSet Constraint) { |
1998 | switch (Sym->getKind()) { |
1999 | #define SYMBOL(Id, Parent) \ |
2000 | case SymExpr::Id##Kind: \ |
2001 | DISPATCH(Id); |
2002 | #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def" |
2003 | } |
2004 | llvm_unreachable("Unknown SymExpr kind!" ); |
2005 | } |
2006 | |
2007 | #define DEFAULT_ASSIGN(Id) \ |
2008 | bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \ |
2009 | return true; \ |
2010 | } \ |
2011 | bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \ |
2012 | bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; } |
2013 | |
2014 | // When we dispatch for constraint types, we first try to check |
2015 | // if the new constraint is the constant and try the corresponding |
2016 | // assignor methods. If it didn't interrupt, we can proceed to the |
2017 | // range, and finally to the range set. |
2018 | #define CONSTRAINT_DISPATCH(Id) \ |
2019 | if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \ |
2020 | ASSIGN(Id, Const, Sym, *Const); \ |
2021 | } \ |
2022 | if (Constraint.size() == 1) { \ |
2023 | ASSIGN(Id, Range, Sym, *Constraint.begin()); \ |
2024 | } \ |
2025 | ASSIGN(Id, RangeSet, Sym, Constraint) |
2026 | |
2027 | // Our internal assign method first tries to call assignor methods for all |
2028 | // constraint types that apply. And if not interrupted, continues with its |
2029 | // parent class. |
2030 | #define SYMBOL(Id, Parent) \ |
2031 | bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \ |
2032 | CONSTRAINT_DISPATCH(Id); \ |
2033 | DISPATCH(Parent); \ |
2034 | } \ |
2035 | DEFAULT_ASSIGN(Id) |
2036 | #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent) |
2037 | #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def" |
2038 | |
2039 | // Default implementations for the top class that doesn't have parents. |
2040 | bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) { |
2041 | CONSTRAINT_DISPATCH(SymExpr); |
2042 | return true; |
2043 | } |
2044 | DEFAULT_ASSIGN(SymExpr); |
2045 | |
2046 | #undef DISPATCH |
2047 | #undef CONSTRAINT_DISPATCH |
2048 | #undef DEFAULT_ASSIGN |
2049 | #undef ASSIGN |
2050 | }; |
2051 | |
2052 | /// A little component aggregating all of the reasoning we have about |
2053 | /// assigning new constraints to symbols. |
2054 | /// |
2055 | /// The main purpose of this class is to associate constraints to symbols, |
2056 | /// and impose additional constraints on other symbols, when we can imply |
2057 | /// them. |
2058 | /// |
2059 | /// It has a nice symmetry with SymbolicRangeInferrer. When the latter |
2060 | /// can provide more precise ranges by looking into the operands of the |
2061 | /// expression in question, ConstraintAssignor looks into the operands |
2062 | /// to see if we can imply more from the new constraint. |
2063 | class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> { |
2064 | public: |
2065 | template <class ClassOrSymbol> |
2066 | [[nodiscard]] static ProgramStateRef |
2067 | assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F, |
2068 | ClassOrSymbol CoS, RangeSet NewConstraint) { |
2069 | if (!State || NewConstraint.isEmpty()) |
2070 | return nullptr; |
2071 | |
2072 | ConstraintAssignor Assignor{State, Builder, F}; |
2073 | return Assignor.assign(CoS, NewConstraint); |
2074 | } |
2075 | |
2076 | /// Handle expressions like: a % b != 0. |
2077 | template <typename SymT> |
2078 | bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) { |
2079 | if (Sym->getOpcode() != BO_Rem) |
2080 | return true; |
2081 | // a % b != 0 implies that a != 0. |
2082 | if (!Constraint.containsZero()) { |
2083 | SVal SymSVal = Builder.makeSymbolVal(Sym: Sym->getLHS()); |
2084 | if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) { |
2085 | State = State->assume(Cond: *NonLocSymSVal, Assumption: true); |
2086 | if (!State) |
2087 | return false; |
2088 | } |
2089 | } |
2090 | return true; |
2091 | } |
2092 | |
2093 | inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint); |
2094 | inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym, |
2095 | RangeSet Constraint) { |
2096 | return handleRemainderOp(Sym, Constraint); |
2097 | } |
2098 | inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym, |
2099 | RangeSet Constraint); |
2100 | |
2101 | private: |
2102 | ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder, |
2103 | RangeSet::Factory &F) |
2104 | : State(State), Builder(Builder), RangeFactory(F) {} |
2105 | using Base = ConstraintAssignorBase<ConstraintAssignor>; |
2106 | |
2107 | /// Base method for handling new constraints for symbols. |
2108 | [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) { |
2109 | // All constraints are actually associated with equivalence classes, and |
2110 | // that's what we are going to do first. |
2111 | State = assign(Class: EquivalenceClass::find(State, Sym), NewConstraint); |
2112 | if (!State) |
2113 | return nullptr; |
2114 | |
2115 | // And after that we can check what other things we can get from this |
2116 | // constraint. |
2117 | Base::assign(Sym, Constraint: NewConstraint); |
2118 | return State; |
2119 | } |
2120 | |
2121 | /// Base method for handling new constraints for classes. |
2122 | [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class, |
2123 | RangeSet NewConstraint) { |
2124 | // There is a chance that we might need to update constraints for the |
2125 | // classes that are known to be disequal to Class. |
2126 | // |
2127 | // In order for this to be even possible, the new constraint should |
2128 | // be simply a constant because we can't reason about range disequalities. |
2129 | if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) { |
2130 | |
2131 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2132 | ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>(); |
2133 | |
2134 | // Add new constraint. |
2135 | Constraints = CF.add(Old: Constraints, K: Class, D: NewConstraint); |
2136 | |
2137 | for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) { |
2138 | RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange( |
2139 | F&: RangeFactory, State, Origin: DisequalClass); |
2140 | |
2141 | UpdatedConstraint = RangeFactory.deletePoint(From: UpdatedConstraint, Point: *Point); |
2142 | |
2143 | // If we end up with at least one of the disequal classes to be |
2144 | // constrained with an empty range-set, the state is infeasible. |
2145 | if (UpdatedConstraint.isEmpty()) |
2146 | return nullptr; |
2147 | |
2148 | Constraints = CF.add(Old: Constraints, K: DisequalClass, D: UpdatedConstraint); |
2149 | } |
2150 | assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " |
2151 | "a state with infeasible constraints" ); |
2152 | |
2153 | return setConstraints(State, Constraints); |
2154 | } |
2155 | |
2156 | return setConstraint(State, Class, Constraint: NewConstraint); |
2157 | } |
2158 | |
2159 | ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS, |
2160 | SymbolRef RHS) { |
2161 | return EquivalenceClass::markDisequal(F&: RangeFactory, State, First: LHS, Second: RHS); |
2162 | } |
2163 | |
2164 | ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS, |
2165 | SymbolRef RHS) { |
2166 | return EquivalenceClass::merge(F&: RangeFactory, State, First: LHS, Second: RHS); |
2167 | } |
2168 | |
2169 | [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) { |
2170 | assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here" ); |
2171 | |
2172 | if (Constraint.getConcreteValue()) |
2173 | return !Constraint.getConcreteValue()->isZero(); |
2174 | |
2175 | if (!Constraint.containsZero()) |
2176 | return true; |
2177 | |
2178 | return std::nullopt; |
2179 | } |
2180 | |
2181 | ProgramStateRef State; |
2182 | SValBuilder &Builder; |
2183 | RangeSet::Factory &RangeFactory; |
2184 | }; |
2185 | |
2186 | bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, |
2187 | const llvm::APSInt &Constraint) { |
2188 | llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses; |
2189 | // Iterate over all equivalence classes and try to simplify them. |
2190 | ClassMembersTy Members = State->get<ClassMembers>(); |
2191 | for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) { |
2192 | EquivalenceClass Class = ClassToSymbolSet.first; |
2193 | State = EquivalenceClass::simplify(SVB&: Builder, F&: RangeFactory, State, Class); |
2194 | if (!State) |
2195 | return false; |
2196 | SimplifiedClasses.insert(V: Class); |
2197 | } |
2198 | |
2199 | // Trivial equivalence classes (those that have only one symbol member) are |
2200 | // not stored in the State. Thus, we must skim through the constraints as |
2201 | // well. And we try to simplify symbols in the constraints. |
2202 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2203 | for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) { |
2204 | EquivalenceClass Class = ClassConstraint.first; |
2205 | if (SimplifiedClasses.count(V: Class)) // Already simplified. |
2206 | continue; |
2207 | State = EquivalenceClass::simplify(SVB&: Builder, F&: RangeFactory, State, Class); |
2208 | if (!State) |
2209 | return false; |
2210 | } |
2211 | |
2212 | // We may have trivial equivalence classes in the disequality info as |
2213 | // well, and we need to simplify them. |
2214 | DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>(); |
2215 | for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry : |
2216 | DisequalityInfo) { |
2217 | EquivalenceClass Class = DisequalityEntry.first; |
2218 | ClassSet DisequalClasses = DisequalityEntry.second; |
2219 | State = EquivalenceClass::simplify(SVB&: Builder, F&: RangeFactory, State, Class); |
2220 | if (!State) |
2221 | return false; |
2222 | } |
2223 | |
2224 | return true; |
2225 | } |
2226 | |
2227 | bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym, |
2228 | RangeSet Constraint) { |
2229 | if (!handleRemainderOp(Sym, Constraint)) |
2230 | return false; |
2231 | |
2232 | std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint); |
2233 | |
2234 | if (!ConstraintAsBool) |
2235 | return true; |
2236 | |
2237 | if (std::optional<bool> Equality = meansEquality(Sym)) { |
2238 | // Here we cover two cases: |
2239 | // * if Sym is equality and the new constraint is true -> Sym's operands |
2240 | // should be marked as equal |
2241 | // * if Sym is disequality and the new constraint is false -> Sym's |
2242 | // operands should be also marked as equal |
2243 | if (*Equality == *ConstraintAsBool) { |
2244 | State = trackEquality(State, LHS: Sym->getLHS(), RHS: Sym->getRHS()); |
2245 | } else { |
2246 | // Other combinations leave as with disequal operands. |
2247 | State = trackDisequality(State, LHS: Sym->getLHS(), RHS: Sym->getRHS()); |
2248 | } |
2249 | |
2250 | if (!State) |
2251 | return false; |
2252 | } |
2253 | |
2254 | return true; |
2255 | } |
2256 | |
2257 | } // end anonymous namespace |
2258 | |
2259 | std::unique_ptr<ConstraintManager> |
2260 | ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, |
2261 | ExprEngine *Eng) { |
2262 | return std::make_unique<RangeConstraintManager>(args&: Eng, args&: StMgr.getSValBuilder()); |
2263 | } |
2264 | |
2265 | ConstraintMap ento::getConstraintMap(ProgramStateRef State) { |
2266 | ConstraintMap::Factory &F = State->get_context<ConstraintMap>(); |
2267 | ConstraintMap Result = F.getEmptyMap(); |
2268 | |
2269 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2270 | for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) { |
2271 | EquivalenceClass Class = ClassConstraint.first; |
2272 | SymbolSet ClassMembers = Class.getClassMembers(State); |
2273 | assert(!ClassMembers.isEmpty() && |
2274 | "Class must always have at least one member!" ); |
2275 | |
2276 | SymbolRef Representative = *ClassMembers.begin(); |
2277 | Result = F.add(Old: Result, K: Representative, D: ClassConstraint.second); |
2278 | } |
2279 | |
2280 | return Result; |
2281 | } |
2282 | |
2283 | //===----------------------------------------------------------------------===// |
2284 | // EqualityClass implementation details |
2285 | //===----------------------------------------------------------------------===// |
2286 | |
2287 | LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State, |
2288 | raw_ostream &os) const { |
2289 | SymbolSet ClassMembers = getClassMembers(State); |
2290 | for (const SymbolRef &MemberSym : ClassMembers) { |
2291 | MemberSym->dump(); |
2292 | os << "\n" ; |
2293 | } |
2294 | } |
2295 | |
2296 | inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, |
2297 | SymbolRef Sym) { |
2298 | assert(State && "State should not be null" ); |
2299 | assert(Sym && "Symbol should not be null" ); |
2300 | // We store far from all Symbol -> Class mappings |
2301 | if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(key: Sym)) |
2302 | return *NontrivialClass; |
2303 | |
2304 | // This is a trivial class of Sym. |
2305 | return Sym; |
2306 | } |
2307 | |
2308 | inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, |
2309 | ProgramStateRef State, |
2310 | SymbolRef First, |
2311 | SymbolRef Second) { |
2312 | EquivalenceClass FirstClass = find(State, Sym: First); |
2313 | EquivalenceClass SecondClass = find(State, Sym: Second); |
2314 | |
2315 | return FirstClass.merge(F, State, Other: SecondClass); |
2316 | } |
2317 | |
2318 | inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, |
2319 | ProgramStateRef State, |
2320 | EquivalenceClass Other) { |
2321 | // It is already the same class. |
2322 | if (*this == Other) |
2323 | return State; |
2324 | |
2325 | // FIXME: As of now, we support only equivalence classes of the same type. |
2326 | // This limitation is connected to the lack of explicit casts in |
2327 | // our symbolic expression model. |
2328 | // |
2329 | // That means that for `int x` and `char y` we don't distinguish |
2330 | // between these two very different cases: |
2331 | // * `x == y` |
2332 | // * `(char)x == y` |
2333 | // |
2334 | // The moment we introduce symbolic casts, this restriction can be |
2335 | // lifted. |
2336 | if (getType() != Other.getType()) |
2337 | return State; |
2338 | |
2339 | SymbolSet Members = getClassMembers(State); |
2340 | SymbolSet OtherMembers = Other.getClassMembers(State); |
2341 | |
2342 | // We estimate the size of the class by the height of tree containing |
2343 | // its members. Merging is not a trivial operation, so it's easier to |
2344 | // merge the smaller class into the bigger one. |
2345 | if (Members.getHeight() >= OtherMembers.getHeight()) { |
2346 | return mergeImpl(F, State, Members, Other, OtherMembers); |
2347 | } else { |
2348 | return Other.mergeImpl(F, State, Members: OtherMembers, Other: *this, OtherMembers: Members); |
2349 | } |
2350 | } |
2351 | |
2352 | inline ProgramStateRef |
2353 | EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory, |
2354 | ProgramStateRef State, SymbolSet MyMembers, |
2355 | EquivalenceClass Other, SymbolSet OtherMembers) { |
2356 | // Essentially what we try to recreate here is some kind of union-find |
2357 | // data structure. It does have certain limitations due to persistence |
2358 | // and the need to remove elements from classes. |
2359 | // |
2360 | // In this setting, EquialityClass object is the representative of the class |
2361 | // or the parent element. ClassMap is a mapping of class members to their |
2362 | // parent. Unlike the union-find structure, they all point directly to the |
2363 | // class representative because we don't have an opportunity to actually do |
2364 | // path compression when dealing with immutability. This means that we |
2365 | // compress paths every time we do merges. It also means that we lose |
2366 | // the main amortized complexity benefit from the original data structure. |
2367 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2368 | ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>(); |
2369 | |
2370 | // 1. If the merged classes have any constraints associated with them, we |
2371 | // need to transfer them to the class we have left. |
2372 | // |
2373 | // Intersection here makes perfect sense because both of these constraints |
2374 | // must hold for the whole new class. |
2375 | if (std::optional<RangeSet> NewClassConstraint = |
2376 | intersect(F&: RangeFactory, Head: getConstraint(State, Class: *this), |
2377 | Second: getConstraint(State, Class: Other))) { |
2378 | // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because |
2379 | // range inferrer shouldn't generate ranges incompatible with |
2380 | // equivalence classes. However, at the moment, due to imperfections |
2381 | // in the solver, it is possible and the merge function can also |
2382 | // return infeasible states aka null states. |
2383 | if (NewClassConstraint->isEmpty()) |
2384 | // Infeasible state |
2385 | return nullptr; |
2386 | |
2387 | // No need in tracking constraints of a now-dissolved class. |
2388 | Constraints = CRF.remove(Old: Constraints, K: Other); |
2389 | // Assign new constraints for this class. |
2390 | Constraints = CRF.add(Old: Constraints, K: *this, D: *NewClassConstraint); |
2391 | |
2392 | assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " |
2393 | "a state with infeasible constraints" ); |
2394 | |
2395 | State = State->set<ConstraintRange>(Constraints); |
2396 | } |
2397 | |
2398 | // 2. Get ALL equivalence-related maps |
2399 | ClassMapTy Classes = State->get<ClassMap>(); |
2400 | ClassMapTy::Factory &CMF = State->get_context<ClassMap>(); |
2401 | |
2402 | ClassMembersTy Members = State->get<ClassMembers>(); |
2403 | ClassMembersTy::Factory &MF = State->get_context<ClassMembers>(); |
2404 | |
2405 | DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>(); |
2406 | DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>(); |
2407 | |
2408 | ClassSet::Factory &CF = State->get_context<ClassSet>(); |
2409 | SymbolSet::Factory &F = getMembersFactory(State); |
2410 | |
2411 | // 2. Merge members of the Other class into the current class. |
2412 | SymbolSet NewClassMembers = MyMembers; |
2413 | for (SymbolRef Sym : OtherMembers) { |
2414 | NewClassMembers = F.add(Old: NewClassMembers, V: Sym); |
2415 | // *this is now the class for all these new symbols. |
2416 | Classes = CMF.add(Old: Classes, K: Sym, D: *this); |
2417 | } |
2418 | |
2419 | // 3. Adjust member mapping. |
2420 | // |
2421 | // No need in tracking members of a now-dissolved class. |
2422 | Members = MF.remove(Old: Members, K: Other); |
2423 | // Now only the current class is mapped to all the symbols. |
2424 | Members = MF.add(Old: Members, K: *this, D: NewClassMembers); |
2425 | |
2426 | // 4. Update disequality relations |
2427 | ClassSet DisequalToOther = Other.getDisequalClasses(Map: DisequalityInfo, Factory&: CF); |
2428 | // We are about to merge two classes but they are already known to be |
2429 | // non-equal. This is a contradiction. |
2430 | if (DisequalToOther.contains(V: *this)) |
2431 | return nullptr; |
2432 | |
2433 | if (!DisequalToOther.isEmpty()) { |
2434 | ClassSet DisequalToThis = getDisequalClasses(Map: DisequalityInfo, Factory&: CF); |
2435 | DisequalityInfo = DF.remove(Old: DisequalityInfo, K: Other); |
2436 | |
2437 | for (EquivalenceClass DisequalClass : DisequalToOther) { |
2438 | DisequalToThis = CF.add(Old: DisequalToThis, V: DisequalClass); |
2439 | |
2440 | // Disequality is a symmetric relation meaning that if |
2441 | // DisequalToOther not null then the set for DisequalClass is not |
2442 | // empty and has at least Other. |
2443 | ClassSet OriginalSetLinkedToOther = |
2444 | *DisequalityInfo.lookup(K: DisequalClass); |
2445 | |
2446 | // Other will be eliminated and we should replace it with the bigger |
2447 | // united class. |
2448 | ClassSet NewSet = CF.remove(Old: OriginalSetLinkedToOther, V: Other); |
2449 | NewSet = CF.add(Old: NewSet, V: *this); |
2450 | |
2451 | DisequalityInfo = DF.add(Old: DisequalityInfo, K: DisequalClass, D: NewSet); |
2452 | } |
2453 | |
2454 | DisequalityInfo = DF.add(Old: DisequalityInfo, K: *this, D: DisequalToThis); |
2455 | State = State->set<DisequalityMap>(DisequalityInfo); |
2456 | } |
2457 | |
2458 | // 5. Update the state |
2459 | State = State->set<ClassMap>(Classes); |
2460 | State = State->set<ClassMembers>(Members); |
2461 | |
2462 | return State; |
2463 | } |
2464 | |
2465 | inline SymbolSet::Factory & |
2466 | EquivalenceClass::getMembersFactory(ProgramStateRef State) { |
2467 | return State->get_context<SymbolSet>(); |
2468 | } |
2469 | |
2470 | SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const { |
2471 | if (const SymbolSet *Members = State->get<ClassMembers>(key: *this)) |
2472 | return *Members; |
2473 | |
2474 | // This class is trivial, so we need to construct a set |
2475 | // with just that one symbol from the class. |
2476 | SymbolSet::Factory &F = getMembersFactory(State); |
2477 | return F.add(Old: F.getEmptySet(), V: getRepresentativeSymbol()); |
2478 | } |
2479 | |
2480 | bool EquivalenceClass::isTrivial(ProgramStateRef State) const { |
2481 | return State->get<ClassMembers>(key: *this) == nullptr; |
2482 | } |
2483 | |
2484 | bool EquivalenceClass::isTriviallyDead(ProgramStateRef State, |
2485 | SymbolReaper &Reaper) const { |
2486 | return isTrivial(State) && Reaper.isDead(sym: getRepresentativeSymbol()); |
2487 | } |
2488 | |
2489 | inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, |
2490 | ProgramStateRef State, |
2491 | SymbolRef First, |
2492 | SymbolRef Second) { |
2493 | return markDisequal(F&: RF, State, First: find(State, Sym: First), Second: find(State, Sym: Second)); |
2494 | } |
2495 | |
2496 | inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, |
2497 | ProgramStateRef State, |
2498 | EquivalenceClass First, |
2499 | EquivalenceClass Second) { |
2500 | return First.markDisequal(F&: RF, State, Other: Second); |
2501 | } |
2502 | |
2503 | inline ProgramStateRef |
2504 | EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, |
2505 | EquivalenceClass Other) const { |
2506 | // If we know that two classes are equal, we can only produce an infeasible |
2507 | // state. |
2508 | if (*this == Other) { |
2509 | return nullptr; |
2510 | } |
2511 | |
2512 | DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>(); |
2513 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2514 | |
2515 | // Disequality is a symmetric relation, so if we mark A as disequal to B, |
2516 | // we should also mark B as disequalt to A. |
2517 | if (!addToDisequalityInfo(Info&: DisequalityInfo, Constraints, F&: RF, State, First: *this, |
2518 | Second: Other) || |
2519 | !addToDisequalityInfo(Info&: DisequalityInfo, Constraints, F&: RF, State, First: Other, |
2520 | Second: *this)) |
2521 | return nullptr; |
2522 | |
2523 | assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " |
2524 | "a state with infeasible constraints" ); |
2525 | |
2526 | State = State->set<DisequalityMap>(DisequalityInfo); |
2527 | State = State->set<ConstraintRange>(Constraints); |
2528 | |
2529 | return State; |
2530 | } |
2531 | |
2532 | inline bool EquivalenceClass::addToDisequalityInfo( |
2533 | DisequalityMapTy &Info, ConstraintRangeTy &Constraints, |
2534 | RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, |
2535 | EquivalenceClass Second) { |
2536 | |
2537 | // 1. Get all of the required factories. |
2538 | DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>(); |
2539 | ClassSet::Factory &CF = State->get_context<ClassSet>(); |
2540 | ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>(); |
2541 | |
2542 | // 2. Add Second to the set of classes disequal to First. |
2543 | const ClassSet *CurrentSet = Info.lookup(K: First); |
2544 | ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet(); |
2545 | NewSet = CF.add(Old: NewSet, V: Second); |
2546 | |
2547 | Info = F.add(Old: Info, K: First, D: NewSet); |
2548 | |
2549 | // 3. If Second is known to be a constant, we can delete this point |
2550 | // from the constraint asociated with First. |
2551 | // |
2552 | // So, if Second == 10, it means that First != 10. |
2553 | // At the same time, the same logic does not apply to ranges. |
2554 | if (const RangeSet *SecondConstraint = Constraints.lookup(K: Second)) |
2555 | if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) { |
2556 | |
2557 | RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange( |
2558 | F&: RF, State, Origin: First.getRepresentativeSymbol()); |
2559 | |
2560 | FirstConstraint = RF.deletePoint(From: FirstConstraint, Point: *Point); |
2561 | |
2562 | // If the First class is about to be constrained with an empty |
2563 | // range-set, the state is infeasible. |
2564 | if (FirstConstraint.isEmpty()) |
2565 | return false; |
2566 | |
2567 | Constraints = CRF.add(Old: Constraints, K: First, D: FirstConstraint); |
2568 | } |
2569 | |
2570 | return true; |
2571 | } |
2572 | |
2573 | inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, |
2574 | SymbolRef FirstSym, |
2575 | SymbolRef SecondSym) { |
2576 | return EquivalenceClass::areEqual(State, First: find(State, Sym: FirstSym), |
2577 | Second: find(State, Sym: SecondSym)); |
2578 | } |
2579 | |
2580 | inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, |
2581 | EquivalenceClass First, |
2582 | EquivalenceClass Second) { |
2583 | // The same equivalence class => symbols are equal. |
2584 | if (First == Second) |
2585 | return true; |
2586 | |
2587 | // Let's check if we know anything about these two classes being not equal to |
2588 | // each other. |
2589 | ClassSet DisequalToFirst = First.getDisequalClasses(State); |
2590 | if (DisequalToFirst.contains(V: Second)) |
2591 | return false; |
2592 | |
2593 | // It is not clear. |
2594 | return std::nullopt; |
2595 | } |
2596 | |
2597 | [[nodiscard]] ProgramStateRef |
2598 | EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { |
2599 | |
2600 | SymbolSet ClsMembers = getClassMembers(State); |
2601 | assert(ClsMembers.contains(Old)); |
2602 | |
2603 | // Remove `Old`'s Class->Sym relation. |
2604 | SymbolSet::Factory &F = getMembersFactory(State); |
2605 | ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>(); |
2606 | ClsMembers = F.remove(Old: ClsMembers, V: Old); |
2607 | // Ensure another precondition of the removeMember function (we can check |
2608 | // this only with isEmpty, thus we have to do the remove first). |
2609 | assert(!ClsMembers.isEmpty() && |
2610 | "Class should have had at least two members before member removal" ); |
2611 | // Overwrite the existing members assigned to this class. |
2612 | ClassMembersTy ClassMembersMap = State->get<ClassMembers>(); |
2613 | ClassMembersMap = EMFactory.add(Old: ClassMembersMap, K: *this, D: ClsMembers); |
2614 | State = State->set<ClassMembers>(ClassMembersMap); |
2615 | |
2616 | // Remove `Old`'s Sym->Class relation. |
2617 | ClassMapTy Classes = State->get<ClassMap>(); |
2618 | ClassMapTy::Factory &CMF = State->get_context<ClassMap>(); |
2619 | Classes = CMF.remove(Old: Classes, K: Old); |
2620 | State = State->set<ClassMap>(Classes); |
2621 | |
2622 | return State; |
2623 | } |
2624 | |
2625 | // Re-evaluate an SVal with top-level `State->assume` logic. |
2626 | [[nodiscard]] ProgramStateRef |
2627 | reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) { |
2628 | if (!Constraint) |
2629 | return State; |
2630 | |
2631 | const auto DefinedVal = TheValue.castAs<DefinedSVal>(); |
2632 | |
2633 | // If the SVal is 0, we can simply interpret that as `false`. |
2634 | if (Constraint->encodesFalseRange()) |
2635 | return State->assume(Cond: DefinedVal, Assumption: false); |
2636 | |
2637 | // If the constraint does not encode 0 then we can interpret that as `true` |
2638 | // AND as a Range(Set). |
2639 | if (Constraint->encodesTrueRange()) { |
2640 | State = State->assume(Cond: DefinedVal, Assumption: true); |
2641 | if (!State) |
2642 | return nullptr; |
2643 | // Fall through, re-assume based on the range values as well. |
2644 | } |
2645 | // Overestimate the individual Ranges with the RangeSet' lowest and |
2646 | // highest values. |
2647 | return State->assumeInclusiveRange(Val: DefinedVal, From: Constraint->getMinValue(), |
2648 | To: Constraint->getMaxValue(), Assumption: true); |
2649 | } |
2650 | |
2651 | // Iterate over all symbols and try to simplify them. Once a symbol is |
2652 | // simplified then we check if we can merge the simplified symbol's equivalence |
2653 | // class to this class. This way, we simplify not just the symbols but the |
2654 | // classes as well: we strive to keep the number of the classes to be the |
2655 | // absolute minimum. |
2656 | [[nodiscard]] ProgramStateRef |
2657 | EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, |
2658 | ProgramStateRef State, EquivalenceClass Class) { |
2659 | SymbolSet ClassMembers = Class.getClassMembers(State); |
2660 | for (const SymbolRef &MemberSym : ClassMembers) { |
2661 | |
2662 | const SVal SimplifiedMemberVal = simplifyToSVal(State, Sym: MemberSym); |
2663 | const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol(); |
2664 | |
2665 | // The symbol is collapsed to a constant, check if the current State is |
2666 | // still feasible. |
2667 | if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) { |
2668 | const llvm::APSInt &SV = CI->getValue(); |
2669 | const RangeSet *ClassConstraint = getConstraint(State, Class); |
2670 | // We have found a contradiction. |
2671 | if (ClassConstraint && !ClassConstraint->contains(Point: SV)) |
2672 | return nullptr; |
2673 | } |
2674 | |
2675 | if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) { |
2676 | // The simplified symbol should be the member of the original Class, |
2677 | // however, it might be in another existing class at the moment. We |
2678 | // have to merge these classes. |
2679 | ProgramStateRef OldState = State; |
2680 | State = merge(F, State, First: MemberSym, Second: SimplifiedMemberSym); |
2681 | if (!State) |
2682 | return nullptr; |
2683 | // No state change, no merge happened actually. |
2684 | if (OldState == State) |
2685 | continue; |
2686 | |
2687 | // Be aware that `SimplifiedMemberSym` might refer to an already dead |
2688 | // symbol. In that case, the eqclass of that might not be the same as the |
2689 | // eqclass of `MemberSym`. This is because the dead symbols are not |
2690 | // preserved in the `ClassMap`, hence |
2691 | // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass |
2692 | // compared to the eqclass of `MemberSym`. |
2693 | // These eqclasses should be the same if `SimplifiedMemberSym` is alive. |
2694 | // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)) |
2695 | // |
2696 | // Note that `MemberSym` must be alive here since that is from the |
2697 | // `ClassMembers` where all the symbols are alive. |
2698 | |
2699 | // Remove the old and more complex symbol. |
2700 | State = find(State, Sym: MemberSym).removeMember(State, Old: MemberSym); |
2701 | |
2702 | // Query the class constraint again b/c that may have changed during the |
2703 | // merge above. |
2704 | const RangeSet *ClassConstraint = getConstraint(State, Class); |
2705 | |
2706 | // Re-evaluate an SVal with top-level `State->assume`, this ignites |
2707 | // a RECURSIVE algorithm that will reach a FIXPOINT. |
2708 | // |
2709 | // About performance and complexity: Let us assume that in a State we |
2710 | // have N non-trivial equivalence classes and that all constraints and |
2711 | // disequality info is related to non-trivial classes. In the worst case, |
2712 | // we can simplify only one symbol of one class in each iteration. The |
2713 | // number of symbols in one class cannot grow b/c we replace the old |
2714 | // symbol with the simplified one. Also, the number of the equivalence |
2715 | // classes can decrease only, b/c the algorithm does a merge operation |
2716 | // optionally. We need N iterations in this case to reach the fixpoint. |
2717 | // Thus, the steps needed to be done in the worst case is proportional to |
2718 | // N*N. |
2719 | // |
2720 | // This worst case scenario can be extended to that case when we have |
2721 | // trivial classes in the constraints and in the disequality map. This |
2722 | // case can be reduced to the case with a State where there are only |
2723 | // non-trivial classes. This is because a merge operation on two trivial |
2724 | // classes results in one non-trivial class. |
2725 | State = reAssume(State, Constraint: ClassConstraint, TheValue: SimplifiedMemberVal); |
2726 | if (!State) |
2727 | return nullptr; |
2728 | } |
2729 | } |
2730 | return State; |
2731 | } |
2732 | |
2733 | inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State, |
2734 | SymbolRef Sym) { |
2735 | return find(State, Sym).getDisequalClasses(State); |
2736 | } |
2737 | |
2738 | inline ClassSet |
2739 | EquivalenceClass::getDisequalClasses(ProgramStateRef State) const { |
2740 | return getDisequalClasses(Map: State->get<DisequalityMap>(), |
2741 | Factory&: State->get_context<ClassSet>()); |
2742 | } |
2743 | |
2744 | inline ClassSet |
2745 | EquivalenceClass::getDisequalClasses(DisequalityMapTy Map, |
2746 | ClassSet::Factory &Factory) const { |
2747 | if (const ClassSet *DisequalClasses = Map.lookup(K: *this)) |
2748 | return *DisequalClasses; |
2749 | |
2750 | return Factory.getEmptySet(); |
2751 | } |
2752 | |
2753 | bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) { |
2754 | ClassMembersTy Members = State->get<ClassMembers>(); |
2755 | |
2756 | for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) { |
2757 | for (SymbolRef Member : ClassMembersPair.second) { |
2758 | // Every member of the class should have a mapping back to the class. |
2759 | if (find(State, Sym: Member) == ClassMembersPair.first) { |
2760 | continue; |
2761 | } |
2762 | |
2763 | return false; |
2764 | } |
2765 | } |
2766 | |
2767 | DisequalityMapTy Disequalities = State->get<DisequalityMap>(); |
2768 | for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) { |
2769 | EquivalenceClass Class = DisequalityInfo.first; |
2770 | ClassSet DisequalClasses = DisequalityInfo.second; |
2771 | |
2772 | // There is no use in keeping empty sets in the map. |
2773 | if (DisequalClasses.isEmpty()) |
2774 | return false; |
2775 | |
2776 | // Disequality is symmetrical, i.e. for every Class A and B that A != B, |
2777 | // B != A should also be true. |
2778 | for (EquivalenceClass DisequalClass : DisequalClasses) { |
2779 | const ClassSet *DisequalToDisequalClasses = |
2780 | Disequalities.lookup(K: DisequalClass); |
2781 | |
2782 | // It should be a set of at least one element: Class |
2783 | if (!DisequalToDisequalClasses || |
2784 | !DisequalToDisequalClasses->contains(V: Class)) |
2785 | return false; |
2786 | } |
2787 | } |
2788 | |
2789 | return true; |
2790 | } |
2791 | |
2792 | //===----------------------------------------------------------------------===// |
2793 | // RangeConstraintManager implementation |
2794 | //===----------------------------------------------------------------------===// |
2795 | |
2796 | bool RangeConstraintManager::canReasonAbout(SVal X) const { |
2797 | std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>(); |
2798 | if (SymVal && SymVal->isExpression()) { |
2799 | const SymExpr *SE = SymVal->getSymbol(); |
2800 | |
2801 | if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(Val: SE)) { |
2802 | switch (SIE->getOpcode()) { |
2803 | // We don't reason yet about bitwise-constraints on symbolic values. |
2804 | case BO_And: |
2805 | case BO_Or: |
2806 | case BO_Xor: |
2807 | return false; |
2808 | // We don't reason yet about these arithmetic constraints on |
2809 | // symbolic values. |
2810 | case BO_Mul: |
2811 | case BO_Div: |
2812 | case BO_Rem: |
2813 | case BO_Shl: |
2814 | case BO_Shr: |
2815 | return false; |
2816 | // All other cases. |
2817 | default: |
2818 | return true; |
2819 | } |
2820 | } |
2821 | |
2822 | if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Val: SE)) { |
2823 | // FIXME: Handle <=> here. |
2824 | if (BinaryOperator::isEqualityOp(SSE->getOpcode()) || |
2825 | BinaryOperator::isRelationalOp(SSE->getOpcode())) { |
2826 | // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc. |
2827 | // We've recently started producing Loc <> NonLoc comparisons (that |
2828 | // result from casts of one of the operands between eg. intptr_t and |
2829 | // void *), but we can't reason about them yet. |
2830 | if (Loc::isLocType(T: SSE->getLHS()->getType())) { |
2831 | return Loc::isLocType(T: SSE->getRHS()->getType()); |
2832 | } |
2833 | } |
2834 | } |
2835 | |
2836 | return false; |
2837 | } |
2838 | |
2839 | return true; |
2840 | } |
2841 | |
2842 | ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, |
2843 | SymbolRef Sym) { |
2844 | const RangeSet *Ranges = getConstraint(State, Sym); |
2845 | |
2846 | // If we don't have any information about this symbol, it's underconstrained. |
2847 | if (!Ranges) |
2848 | return ConditionTruthVal(); |
2849 | |
2850 | // If we have a concrete value, see if it's zero. |
2851 | if (const llvm::APSInt *Value = Ranges->getConcreteValue()) |
2852 | return *Value == 0; |
2853 | |
2854 | BasicValueFactory &BV = getBasicVals(); |
2855 | APSIntType IntType = BV.getAPSIntType(T: Sym->getType()); |
2856 | llvm::APSInt Zero = IntType.getZeroValue(); |
2857 | |
2858 | // Check if zero is in the set of possible values. |
2859 | if (!Ranges->contains(Point: Zero)) |
2860 | return false; |
2861 | |
2862 | // Zero is a possible value, but it is not the /only/ possible value. |
2863 | return ConditionTruthVal(); |
2864 | } |
2865 | |
2866 | const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, |
2867 | SymbolRef Sym) const { |
2868 | const RangeSet *T = getConstraint(State: St, Sym); |
2869 | return T ? T->getConcreteValue() : nullptr; |
2870 | } |
2871 | |
2872 | const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, |
2873 | SymbolRef Sym) const { |
2874 | const RangeSet *T = getConstraint(State: St, Sym); |
2875 | if (!T || T->isEmpty()) |
2876 | return nullptr; |
2877 | return &T->getMinValue(); |
2878 | } |
2879 | |
2880 | const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, |
2881 | SymbolRef Sym) const { |
2882 | const RangeSet *T = getConstraint(State: St, Sym); |
2883 | if (!T || T->isEmpty()) |
2884 | return nullptr; |
2885 | return &T->getMaxValue(); |
2886 | } |
2887 | |
2888 | //===----------------------------------------------------------------------===// |
2889 | // Remove dead symbols from existing constraints |
2890 | //===----------------------------------------------------------------------===// |
2891 | |
2892 | /// Scan all symbols referenced by the constraints. If the symbol is not alive |
2893 | /// as marked in LSymbols, mark it as dead in DSymbols. |
2894 | ProgramStateRef |
2895 | RangeConstraintManager::removeDeadBindings(ProgramStateRef State, |
2896 | SymbolReaper &SymReaper) { |
2897 | ClassMembersTy ClassMembersMap = State->get<ClassMembers>(); |
2898 | ClassMembersTy NewClassMembersMap = ClassMembersMap; |
2899 | ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>(); |
2900 | SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>(); |
2901 | |
2902 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
2903 | ConstraintRangeTy NewConstraints = Constraints; |
2904 | ConstraintRangeTy::Factory &ConstraintFactory = |
2905 | State->get_context<ConstraintRange>(); |
2906 | |
2907 | ClassMapTy Map = State->get<ClassMap>(); |
2908 | ClassMapTy NewMap = Map; |
2909 | ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>(); |
2910 | |
2911 | DisequalityMapTy Disequalities = State->get<DisequalityMap>(); |
2912 | DisequalityMapTy::Factory &DisequalityFactory = |
2913 | State->get_context<DisequalityMap>(); |
2914 | ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>(); |
2915 | |
2916 | bool ClassMapChanged = false; |
2917 | bool MembersMapChanged = false; |
2918 | bool ConstraintMapChanged = false; |
2919 | bool DisequalitiesChanged = false; |
2920 | |
2921 | auto removeDeadClass = [&](EquivalenceClass Class) { |
2922 | // Remove associated constraint ranges. |
2923 | Constraints = ConstraintFactory.remove(Old: Constraints, K: Class); |
2924 | ConstraintMapChanged = true; |
2925 | |
2926 | // Update disequality information to not hold any information on the |
2927 | // removed class. |
2928 | ClassSet DisequalClasses = |
2929 | Class.getDisequalClasses(Map: Disequalities, Factory&: ClassSetFactory); |
2930 | if (!DisequalClasses.isEmpty()) { |
2931 | for (EquivalenceClass DisequalClass : DisequalClasses) { |
2932 | ClassSet DisequalToDisequalSet = |
2933 | DisequalClass.getDisequalClasses(Map: Disequalities, Factory&: ClassSetFactory); |
2934 | // DisequalToDisequalSet is guaranteed to be non-empty for consistent |
2935 | // disequality info. |
2936 | assert(!DisequalToDisequalSet.isEmpty()); |
2937 | ClassSet NewSet = ClassSetFactory.remove(Old: DisequalToDisequalSet, V: Class); |
2938 | |
2939 | // No need in keeping an empty set. |
2940 | if (NewSet.isEmpty()) { |
2941 | Disequalities = |
2942 | DisequalityFactory.remove(Old: Disequalities, K: DisequalClass); |
2943 | } else { |
2944 | Disequalities = |
2945 | DisequalityFactory.add(Old: Disequalities, K: DisequalClass, D: NewSet); |
2946 | } |
2947 | } |
2948 | // Remove the data for the class |
2949 | Disequalities = DisequalityFactory.remove(Old: Disequalities, K: Class); |
2950 | DisequalitiesChanged = true; |
2951 | } |
2952 | }; |
2953 | |
2954 | // 1. Let's see if dead symbols are trivial and have associated constraints. |
2955 | for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair : |
2956 | Constraints) { |
2957 | EquivalenceClass Class = ClassConstraintPair.first; |
2958 | if (Class.isTriviallyDead(State, Reaper&: SymReaper)) { |
2959 | // If this class is trivial, we can remove its constraints right away. |
2960 | removeDeadClass(Class); |
2961 | } |
2962 | } |
2963 | |
2964 | // 2. We don't need to track classes for dead symbols. |
2965 | for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) { |
2966 | SymbolRef Sym = SymbolClassPair.first; |
2967 | |
2968 | if (SymReaper.isDead(sym: Sym)) { |
2969 | ClassMapChanged = true; |
2970 | NewMap = ClassFactory.remove(Old: NewMap, K: Sym); |
2971 | } |
2972 | } |
2973 | |
2974 | // 3. Remove dead members from classes and remove dead non-trivial classes |
2975 | // and their constraints. |
2976 | for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : |
2977 | ClassMembersMap) { |
2978 | EquivalenceClass Class = ClassMembersPair.first; |
2979 | SymbolSet LiveMembers = ClassMembersPair.second; |
2980 | bool MembersChanged = false; |
2981 | |
2982 | for (SymbolRef Member : ClassMembersPair.second) { |
2983 | if (SymReaper.isDead(sym: Member)) { |
2984 | MembersChanged = true; |
2985 | LiveMembers = SetFactory.remove(Old: LiveMembers, V: Member); |
2986 | } |
2987 | } |
2988 | |
2989 | // Check if the class changed. |
2990 | if (!MembersChanged) |
2991 | continue; |
2992 | |
2993 | MembersMapChanged = true; |
2994 | |
2995 | if (LiveMembers.isEmpty()) { |
2996 | // The class is dead now, we need to wipe it out of the members map... |
2997 | NewClassMembersMap = EMFactory.remove(Old: NewClassMembersMap, K: Class); |
2998 | |
2999 | // ...and remove all of its constraints. |
3000 | removeDeadClass(Class); |
3001 | } else { |
3002 | // We need to change the members associated with the class. |
3003 | NewClassMembersMap = |
3004 | EMFactory.add(Old: NewClassMembersMap, K: Class, D: LiveMembers); |
3005 | } |
3006 | } |
3007 | |
3008 | // 4. Update the state with new maps. |
3009 | // |
3010 | // Here we try to be humble and update a map only if it really changed. |
3011 | if (ClassMapChanged) |
3012 | State = State->set<ClassMap>(NewMap); |
3013 | |
3014 | if (MembersMapChanged) |
3015 | State = State->set<ClassMembers>(NewClassMembersMap); |
3016 | |
3017 | if (ConstraintMapChanged) |
3018 | State = State->set<ConstraintRange>(Constraints); |
3019 | |
3020 | if (DisequalitiesChanged) |
3021 | State = State->set<DisequalityMap>(Disequalities); |
3022 | |
3023 | assert(EquivalenceClass::isClassDataConsistent(State)); |
3024 | |
3025 | return State; |
3026 | } |
3027 | |
3028 | RangeSet RangeConstraintManager::getRange(ProgramStateRef State, |
3029 | SymbolRef Sym) { |
3030 | return SymbolicRangeInferrer::inferRange(F, State, Origin: Sym); |
3031 | } |
3032 | |
3033 | ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State, |
3034 | SymbolRef Sym, |
3035 | RangeSet Range) { |
3036 | return ConstraintAssignor::assign(State, Builder&: getSValBuilder(), F, CoS: Sym, NewConstraint: Range); |
3037 | } |
3038 | |
3039 | //===------------------------------------------------------------------------=== |
3040 | // assumeSymX methods: protected interface for RangeConstraintManager. |
3041 | //===------------------------------------------------------------------------=== |
3042 | |
3043 | // The syntax for ranges below is mathematical, using [x, y] for closed ranges |
3044 | // and (x, y) for open ranges. These ranges are modular, corresponding with |
3045 | // a common treatment of C integer overflow. This means that these methods |
3046 | // do not have to worry about overflow; RangeSet::Intersect can handle such a |
3047 | // "wraparound" range. |
3048 | // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, |
3049 | // UINT_MAX, 0, 1, and 2. |
3050 | |
3051 | ProgramStateRef |
3052 | RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, |
3053 | const llvm::APSInt &Int, |
3054 | const llvm::APSInt &Adjustment) { |
3055 | // Before we do any real work, see if the value can even show up. |
3056 | APSIntType AdjustmentType(Adjustment); |
3057 | if (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true) != APSIntType::RTR_Within) |
3058 | return St; |
3059 | |
3060 | llvm::APSInt Point = AdjustmentType.convert(Value: Int) - Adjustment; |
3061 | RangeSet New = getRange(State: St, Sym); |
3062 | New = F.deletePoint(From: New, Point); |
3063 | |
3064 | return setRange(State: St, Sym, Range: New); |
3065 | } |
3066 | |
3067 | ProgramStateRef |
3068 | RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym, |
3069 | const llvm::APSInt &Int, |
3070 | const llvm::APSInt &Adjustment) { |
3071 | // Before we do any real work, see if the value can even show up. |
3072 | APSIntType AdjustmentType(Adjustment); |
3073 | if (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true) != APSIntType::RTR_Within) |
3074 | return nullptr; |
3075 | |
3076 | // [Int-Adjustment, Int-Adjustment] |
3077 | llvm::APSInt AdjInt = AdjustmentType.convert(Value: Int) - Adjustment; |
3078 | RangeSet New = getRange(State: St, Sym); |
3079 | New = F.intersect(LHS: New, Point: AdjInt); |
3080 | |
3081 | return setRange(State: St, Sym, Range: New); |
3082 | } |
3083 | |
3084 | RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St, |
3085 | SymbolRef Sym, |
3086 | const llvm::APSInt &Int, |
3087 | const llvm::APSInt &Adjustment) { |
3088 | // Before we do any real work, see if the value can even show up. |
3089 | APSIntType AdjustmentType(Adjustment); |
3090 | switch (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true)) { |
3091 | case APSIntType::RTR_Below: |
3092 | return F.getEmptySet(); |
3093 | case APSIntType::RTR_Within: |
3094 | break; |
3095 | case APSIntType::RTR_Above: |
3096 | return getRange(State: St, Sym); |
3097 | } |
3098 | |
3099 | // Special case for Int == Min. This is always false. |
3100 | llvm::APSInt ComparisonVal = AdjustmentType.convert(Value: Int); |
3101 | llvm::APSInt Min = AdjustmentType.getMinValue(); |
3102 | if (ComparisonVal == Min) |
3103 | return F.getEmptySet(); |
3104 | |
3105 | llvm::APSInt Lower = Min - Adjustment; |
3106 | llvm::APSInt Upper = ComparisonVal - Adjustment; |
3107 | --Upper; |
3108 | |
3109 | RangeSet Result = getRange(State: St, Sym); |
3110 | return F.intersect(What: Result, Lower, Upper); |
3111 | } |
3112 | |
3113 | ProgramStateRef |
3114 | RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym, |
3115 | const llvm::APSInt &Int, |
3116 | const llvm::APSInt &Adjustment) { |
3117 | RangeSet New = getSymLTRange(St, Sym, Int, Adjustment); |
3118 | return setRange(State: St, Sym, Range: New); |
3119 | } |
3120 | |
3121 | RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St, |
3122 | SymbolRef Sym, |
3123 | const llvm::APSInt &Int, |
3124 | const llvm::APSInt &Adjustment) { |
3125 | // Before we do any real work, see if the value can even show up. |
3126 | APSIntType AdjustmentType(Adjustment); |
3127 | switch (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true)) { |
3128 | case APSIntType::RTR_Below: |
3129 | return getRange(State: St, Sym); |
3130 | case APSIntType::RTR_Within: |
3131 | break; |
3132 | case APSIntType::RTR_Above: |
3133 | return F.getEmptySet(); |
3134 | } |
3135 | |
3136 | // Special case for Int == Max. This is always false. |
3137 | llvm::APSInt ComparisonVal = AdjustmentType.convert(Value: Int); |
3138 | llvm::APSInt Max = AdjustmentType.getMaxValue(); |
3139 | if (ComparisonVal == Max) |
3140 | return F.getEmptySet(); |
3141 | |
3142 | llvm::APSInt Lower = ComparisonVal - Adjustment; |
3143 | llvm::APSInt Upper = Max - Adjustment; |
3144 | ++Lower; |
3145 | |
3146 | RangeSet SymRange = getRange(State: St, Sym); |
3147 | return F.intersect(What: SymRange, Lower, Upper); |
3148 | } |
3149 | |
3150 | ProgramStateRef |
3151 | RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym, |
3152 | const llvm::APSInt &Int, |
3153 | const llvm::APSInt &Adjustment) { |
3154 | RangeSet New = getSymGTRange(St, Sym, Int, Adjustment); |
3155 | return setRange(State: St, Sym, Range: New); |
3156 | } |
3157 | |
3158 | RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St, |
3159 | SymbolRef Sym, |
3160 | const llvm::APSInt &Int, |
3161 | const llvm::APSInt &Adjustment) { |
3162 | // Before we do any real work, see if the value can even show up. |
3163 | APSIntType AdjustmentType(Adjustment); |
3164 | switch (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true)) { |
3165 | case APSIntType::RTR_Below: |
3166 | return getRange(State: St, Sym); |
3167 | case APSIntType::RTR_Within: |
3168 | break; |
3169 | case APSIntType::RTR_Above: |
3170 | return F.getEmptySet(); |
3171 | } |
3172 | |
3173 | // Special case for Int == Min. This is always feasible. |
3174 | llvm::APSInt ComparisonVal = AdjustmentType.convert(Value: Int); |
3175 | llvm::APSInt Min = AdjustmentType.getMinValue(); |
3176 | if (ComparisonVal == Min) |
3177 | return getRange(State: St, Sym); |
3178 | |
3179 | llvm::APSInt Max = AdjustmentType.getMaxValue(); |
3180 | llvm::APSInt Lower = ComparisonVal - Adjustment; |
3181 | llvm::APSInt Upper = Max - Adjustment; |
3182 | |
3183 | RangeSet SymRange = getRange(State: St, Sym); |
3184 | return F.intersect(What: SymRange, Lower, Upper); |
3185 | } |
3186 | |
3187 | ProgramStateRef |
3188 | RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym, |
3189 | const llvm::APSInt &Int, |
3190 | const llvm::APSInt &Adjustment) { |
3191 | RangeSet New = getSymGERange(St, Sym, Int, Adjustment); |
3192 | return setRange(State: St, Sym, Range: New); |
3193 | } |
3194 | |
3195 | RangeSet |
3196 | RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS, |
3197 | const llvm::APSInt &Int, |
3198 | const llvm::APSInt &Adjustment) { |
3199 | // Before we do any real work, see if the value can even show up. |
3200 | APSIntType AdjustmentType(Adjustment); |
3201 | switch (AdjustmentType.testInRange(Val: Int, AllowMixedSign: true)) { |
3202 | case APSIntType::RTR_Below: |
3203 | return F.getEmptySet(); |
3204 | case APSIntType::RTR_Within: |
3205 | break; |
3206 | case APSIntType::RTR_Above: |
3207 | return RS(); |
3208 | } |
3209 | |
3210 | // Special case for Int == Max. This is always feasible. |
3211 | llvm::APSInt ComparisonVal = AdjustmentType.convert(Value: Int); |
3212 | llvm::APSInt Max = AdjustmentType.getMaxValue(); |
3213 | if (ComparisonVal == Max) |
3214 | return RS(); |
3215 | |
3216 | llvm::APSInt Min = AdjustmentType.getMinValue(); |
3217 | llvm::APSInt Lower = Min - Adjustment; |
3218 | llvm::APSInt Upper = ComparisonVal - Adjustment; |
3219 | |
3220 | RangeSet Default = RS(); |
3221 | return F.intersect(What: Default, Lower, Upper); |
3222 | } |
3223 | |
3224 | RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St, |
3225 | SymbolRef Sym, |
3226 | const llvm::APSInt &Int, |
3227 | const llvm::APSInt &Adjustment) { |
3228 | return getSymLERange(RS: [&] { return getRange(State: St, Sym); }, Int, Adjustment); |
3229 | } |
3230 | |
3231 | ProgramStateRef |
3232 | RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym, |
3233 | const llvm::APSInt &Int, |
3234 | const llvm::APSInt &Adjustment) { |
3235 | RangeSet New = getSymLERange(St, Sym, Int, Adjustment); |
3236 | return setRange(State: St, Sym, Range: New); |
3237 | } |
3238 | |
3239 | ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange( |
3240 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
3241 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) { |
3242 | RangeSet New = getSymGERange(St: State, Sym, Int: From, Adjustment); |
3243 | if (New.isEmpty()) |
3244 | return nullptr; |
3245 | RangeSet Out = getSymLERange(RS: [&] { return New; }, Int: To, Adjustment); |
3246 | return setRange(State, Sym, Range: Out); |
3247 | } |
3248 | |
3249 | ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( |
3250 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
3251 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) { |
3252 | RangeSet RangeLT = getSymLTRange(St: State, Sym, Int: From, Adjustment); |
3253 | RangeSet RangeGT = getSymGTRange(St: State, Sym, Int: To, Adjustment); |
3254 | RangeSet New(F.add(LHS: RangeLT, RHS: RangeGT)); |
3255 | return setRange(State, Sym, Range: New); |
3256 | } |
3257 | |
3258 | //===----------------------------------------------------------------------===// |
3259 | // Pretty-printing. |
3260 | //===----------------------------------------------------------------------===// |
3261 | |
3262 | void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, |
3263 | const char *NL, unsigned int Space, |
3264 | bool IsDot) const { |
3265 | printConstraints(Out, State, NL, Space, IsDot); |
3266 | printEquivalenceClasses(Out, State, NL, Space, IsDot); |
3267 | printDisequalities(Out, State, NL, Space, IsDot); |
3268 | } |
3269 | |
3270 | void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State, |
3271 | SymbolRef Sym) { |
3272 | const RangeSet RS = getRange(State, Sym); |
3273 | if (RS.isEmpty()) { |
3274 | Out << "<empty rangeset>" ; |
3275 | return; |
3276 | } |
3277 | Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:" ); |
3278 | RS.dump(OS&: Out); |
3279 | } |
3280 | |
3281 | static std::string toString(const SymbolRef &Sym) { |
3282 | std::string S; |
3283 | llvm::raw_string_ostream O(S); |
3284 | Sym->dumpToStream(os&: O); |
3285 | return O.str(); |
3286 | } |
3287 | |
3288 | void RangeConstraintManager::printConstraints(raw_ostream &Out, |
3289 | ProgramStateRef State, |
3290 | const char *NL, |
3291 | unsigned int Space, |
3292 | bool IsDot) const { |
3293 | ConstraintRangeTy Constraints = State->get<ConstraintRange>(); |
3294 | |
3295 | Indent(Out, Space, IsDot) << "\"constraints\": " ; |
3296 | if (Constraints.isEmpty()) { |
3297 | Out << "null," << NL; |
3298 | return; |
3299 | } |
3300 | |
3301 | std::map<std::string, RangeSet> OrderedConstraints; |
3302 | for (std::pair<EquivalenceClass, RangeSet> P : Constraints) { |
3303 | SymbolSet ClassMembers = P.first.getClassMembers(State); |
3304 | for (const SymbolRef &ClassMember : ClassMembers) { |
3305 | bool insertion_took_place; |
3306 | std::tie(args: std::ignore, args&: insertion_took_place) = |
3307 | OrderedConstraints.insert(x: {toString(Sym: ClassMember), P.second}); |
3308 | assert(insertion_took_place && |
3309 | "two symbols should not have the same dump" ); |
3310 | } |
3311 | } |
3312 | |
3313 | ++Space; |
3314 | Out << '[' << NL; |
3315 | bool First = true; |
3316 | for (std::pair<std::string, RangeSet> P : OrderedConstraints) { |
3317 | if (First) { |
3318 | First = false; |
3319 | } else { |
3320 | Out << ','; |
3321 | Out << NL; |
3322 | } |
3323 | Indent(Out, Space, IsDot) |
3324 | << "{ \"symbol\": \"" << P.first << "\", \"range\": \"" ; |
3325 | P.second.dump(OS&: Out); |
3326 | Out << "\" }" ; |
3327 | } |
3328 | Out << NL; |
3329 | |
3330 | --Space; |
3331 | Indent(Out, Space, IsDot) << "]," << NL; |
3332 | } |
3333 | |
3334 | static std::string toString(ProgramStateRef State, EquivalenceClass Class) { |
3335 | SymbolSet ClassMembers = Class.getClassMembers(State); |
3336 | llvm::SmallVector<SymbolRef, 8> (ClassMembers.begin(), |
3337 | ClassMembers.end()); |
3338 | llvm::sort(C&: ClassMembersSorted, |
3339 | Comp: [](const SymbolRef &LHS, const SymbolRef &RHS) { |
3340 | return toString(Sym: LHS) < toString(Sym: RHS); |
3341 | }); |
3342 | |
3343 | bool FirstMember = true; |
3344 | |
3345 | std::string Str; |
3346 | llvm::raw_string_ostream Out(Str); |
3347 | Out << "[ " ; |
3348 | for (SymbolRef ClassMember : ClassMembersSorted) { |
3349 | if (FirstMember) |
3350 | FirstMember = false; |
3351 | else |
3352 | Out << ", " ; |
3353 | Out << "\"" << ClassMember << "\"" ; |
3354 | } |
3355 | Out << " ]" ; |
3356 | return Out.str(); |
3357 | } |
3358 | |
3359 | void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out, |
3360 | ProgramStateRef State, |
3361 | const char *NL, |
3362 | unsigned int Space, |
3363 | bool IsDot) const { |
3364 | ClassMembersTy Members = State->get<ClassMembers>(); |
3365 | |
3366 | Indent(Out, Space, IsDot) << "\"equivalence_classes\": " ; |
3367 | if (Members.isEmpty()) { |
3368 | Out << "null," << NL; |
3369 | return; |
3370 | } |
3371 | |
3372 | std::set<std::string> ; |
3373 | for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) |
3374 | MembersStr.insert(x: toString(State, Class: ClassToSymbolSet.first)); |
3375 | |
3376 | ++Space; |
3377 | Out << '[' << NL; |
3378 | bool FirstClass = true; |
3379 | for (const std::string &Str : MembersStr) { |
3380 | if (FirstClass) { |
3381 | FirstClass = false; |
3382 | } else { |
3383 | Out << ','; |
3384 | Out << NL; |
3385 | } |
3386 | Indent(Out, Space, IsDot); |
3387 | Out << Str; |
3388 | } |
3389 | Out << NL; |
3390 | |
3391 | --Space; |
3392 | Indent(Out, Space, IsDot) << "]," << NL; |
3393 | } |
3394 | |
3395 | void RangeConstraintManager::printDisequalities(raw_ostream &Out, |
3396 | ProgramStateRef State, |
3397 | const char *NL, |
3398 | unsigned int Space, |
3399 | bool IsDot) const { |
3400 | DisequalityMapTy Disequalities = State->get<DisequalityMap>(); |
3401 | |
3402 | Indent(Out, Space, IsDot) << "\"disequality_info\": " ; |
3403 | if (Disequalities.isEmpty()) { |
3404 | Out << "null," << NL; |
3405 | return; |
3406 | } |
3407 | |
3408 | // Transform the disequality info to an ordered map of |
3409 | // [string -> (ordered set of strings)] |
3410 | using EqClassesStrTy = std::set<std::string>; |
3411 | using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>; |
3412 | DisequalityInfoStrTy DisequalityInfoStr; |
3413 | for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) { |
3414 | EquivalenceClass Class = ClassToDisEqSet.first; |
3415 | ClassSet DisequalClasses = ClassToDisEqSet.second; |
3416 | EqClassesStrTy ; |
3417 | for (EquivalenceClass DisEqClass : DisequalClasses) |
3418 | MembersStr.insert(x: toString(State, Class: DisEqClass)); |
3419 | DisequalityInfoStr.insert(x: {toString(State, Class), MembersStr}); |
3420 | } |
3421 | |
3422 | ++Space; |
3423 | Out << '[' << NL; |
3424 | bool FirstClass = true; |
3425 | for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet : |
3426 | DisequalityInfoStr) { |
3427 | const std::string &Class = ClassToDisEqSet.first; |
3428 | if (FirstClass) { |
3429 | FirstClass = false; |
3430 | } else { |
3431 | Out << ','; |
3432 | Out << NL; |
3433 | } |
3434 | Indent(Out, Space, IsDot) << "{" << NL; |
3435 | unsigned int DisEqSpace = Space + 1; |
3436 | Indent(Out, Space: DisEqSpace, IsDot) << "\"class\": " ; |
3437 | Out << Class; |
3438 | const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second; |
3439 | if (!DisequalClasses.empty()) { |
3440 | Out << "," << NL; |
3441 | Indent(Out, Space: DisEqSpace, IsDot) << "\"disequal_to\": [" << NL; |
3442 | unsigned int DisEqClassSpace = DisEqSpace + 1; |
3443 | Indent(Out, Space: DisEqClassSpace, IsDot); |
3444 | bool FirstDisEqClass = true; |
3445 | for (const std::string &DisEqClass : DisequalClasses) { |
3446 | if (FirstDisEqClass) { |
3447 | FirstDisEqClass = false; |
3448 | } else { |
3449 | Out << ',' << NL; |
3450 | Indent(Out, Space: DisEqClassSpace, IsDot); |
3451 | } |
3452 | Out << DisEqClass; |
3453 | } |
3454 | Out << "]" << NL; |
3455 | } |
3456 | Indent(Out, Space, IsDot) << "}" ; |
3457 | } |
3458 | Out << NL; |
3459 | |
3460 | --Space; |
3461 | Indent(Out, Space, IsDot) << "]," << NL; |
3462 | } |
3463 | |