1//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines RangeConstraintManager, a class that tracks simple
10// equality and inequality constraints on symbolic values of ProgramState.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/JsonSupport.h"
15#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18#include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20#include "llvm/ADT/FoldingSet.h"
21#include "llvm/ADT/ImmutableSet.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/Support/Compiler.h"
26#include "llvm/Support/raw_ostream.h"
27#include <algorithm>
28#include <iterator>
29
30using namespace clang;
31using namespace ento;
32
33// This class can be extended with other tables which will help to reason
34// about ranges more precisely.
35class OperatorRelationsTable {
36 static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
37 BO_GE < BO_EQ && BO_EQ < BO_NE,
38 "This class relies on operators order. Rework it otherwise.");
39
40public:
41 enum TriStateKind {
42 False = 0,
43 True,
44 Unknown,
45 };
46
47private:
48 // CmpOpTable holds states which represent the corresponding range for
49 // branching an exploded graph. We can reason about the branch if there is
50 // a previously known fact of the existence of a comparison expression with
51 // operands used in the current expression.
52 // E.g. assuming (x < y) is true that means (x != y) is surely true.
53 // if (x previous_operation y) // < | != | >
54 // if (x operation y) // != | > | <
55 // tristate // True | Unknown | False
56 //
57 // CmpOpTable represents next:
58 // __|< |> |<=|>=|==|!=|UnknownX2|
59 // < |1 |0 |* |0 |0 |* |1 |
60 // > |0 |1 |0 |* |0 |* |1 |
61 // <=|1 |0 |1 |* |1 |* |0 |
62 // >=|0 |1 |* |1 |1 |* |0 |
63 // ==|0 |0 |* |* |1 |0 |1 |
64 // !=|1 |1 |* |* |0 |1 |0 |
65 //
66 // Columns stands for a previous operator.
67 // Rows stands for a current operator.
68 // Each row has exactly two `Unknown` cases.
69 // UnknownX2 means that both `Unknown` previous operators are met in code,
70 // and there is a special column for that, for example:
71 // if (x >= y)
72 // if (x != y)
73 // if (x <= y)
74 // False only
75 static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
76 const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
77 // < > <= >= == != UnknownX2
78 {True, False, Unknown, False, False, Unknown, True}, // <
79 {False, True, False, Unknown, False, Unknown, True}, // >
80 {True, False, True, Unknown, True, Unknown, False}, // <=
81 {False, True, Unknown, True, True, Unknown, False}, // >=
82 {False, False, Unknown, Unknown, True, False, True}, // ==
83 {True, True, Unknown, Unknown, False, True, False}, // !=
84 };
85
86 static size_t getIndexFromOp(BinaryOperatorKind OP) {
87 return static_cast<size_t>(OP - BO_LT);
88 }
89
90public:
91 constexpr size_t getCmpOpCount() const { return CmpOpCount; }
92
93 static BinaryOperatorKind getOpFromIndex(size_t Index) {
94 return static_cast<BinaryOperatorKind>(Index + BO_LT);
95 }
96
97 TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
98 BinaryOperatorKind QueriedOP) const {
99 return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
100 }
101
102 TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
103 return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
104 }
105};
106
107//===----------------------------------------------------------------------===//
108// RangeSet implementation
109//===----------------------------------------------------------------------===//
110
111RangeSet::ContainerType RangeSet::Factory::EmptySet{};
112
113RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
114 ContainerType Result;
115 Result.reserve(LHS.size() + RHS.size());
116 std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
117 std::back_inserter(Result));
118 return makePersistent(std::move(Result));
119}
120
121RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
122 ContainerType Result;
123 Result.reserve(Original.size() + 1);
124
125 const_iterator Lower = llvm::lower_bound(Original, Element);
126 Result.insert(Result.end(), Original.begin(), Lower);
127 Result.push_back(Element);
128 Result.insert(Result.end(), Lower, Original.end());
129
130 return makePersistent(std::move(Result));
131}
132
133RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
134 return add(Original, Range(Point));
135}
136
137RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
138 ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
139 return makePersistent(std::move(Result));
140}
141
142RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
143 ContainerType Result;
144 Result.push_back(R);
145 Result = unite(*Original.Impl, Result);
146 return makePersistent(std::move(Result));
147}
148
149RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
150 return unite(Original, Range(ValueFactory.getValue(Point)));
151}
152
153RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
154 llvm::APSInt To) {
155 return unite(Original,
156 Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
157}
158
159template <typename T>
160void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
161 std::swap(First, Second);
162 std::swap(FirstEnd, SecondEnd);
163}
164
165RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
166 const ContainerType &RHS) {
167 if (LHS.empty())
168 return RHS;
169 if (RHS.empty())
170 return LHS;
171
172 using llvm::APSInt;
173 using iterator = ContainerType::const_iterator;
174
175 iterator First = LHS.begin();
176 iterator FirstEnd = LHS.end();
177 iterator Second = RHS.begin();
178 iterator SecondEnd = RHS.end();
179 APSIntType Ty = APSIntType(First->From());
180 const APSInt Min = Ty.getMinValue();
181
182 // Handle a corner case first when both range sets start from MIN.
183 // This helps to avoid complicated conditions below. Specifically, this
184 // particular check for `MIN` is not needed in the loop below every time
185 // when we do `Second->From() - One` operation.
186 if (Min == First->From() && Min == Second->From()) {
187 if (First->To() > Second->To()) {
188 // [ First ]--->
189 // [ Second ]----->
190 // MIN^
191 // The Second range is entirely inside the First one.
192
193 // Check if Second is the last in its RangeSet.
194 if (++Second == SecondEnd)
195 // [ First ]--[ First + 1 ]--->
196 // [ Second ]--------------------->
197 // MIN^
198 // The Union is equal to First's RangeSet.
199 return LHS;
200 } else {
201 // case 1: [ First ]----->
202 // case 2: [ First ]--->
203 // [ Second ]--->
204 // MIN^
205 // The First range is entirely inside or equal to the Second one.
206
207 // Check if First is the last in its RangeSet.
208 if (++First == FirstEnd)
209 // [ First ]----------------------->
210 // [ Second ]--[ Second + 1 ]---->
211 // MIN^
212 // The Union is equal to Second's RangeSet.
213 return RHS;
214 }
215 }
216
217 const APSInt One = Ty.getValue(1);
218 ContainerType Result;
219
220 // This is called when there are no ranges left in one of the ranges.
221 // Append the rest of the ranges from another range set to the Result
222 // and return with that.
223 const auto AppendTheRest = [&Result](iterator I, iterator E) {
224 Result.append(I, E);
225 return Result;
226 };
227
228 while (true) {
229 // We want to keep the following invariant at all times:
230 // ---[ First ------>
231 // -----[ Second --->
232 if (First->From() > Second->From())
233 swapIterators(First, FirstEnd, Second, SecondEnd);
234
235 // The Union definitely starts with First->From().
236 // ----------[ First ------>
237 // ------------[ Second --->
238 // ----------[ Union ------>
239 // UnionStart^
240 const llvm::APSInt &UnionStart = First->From();
241
242 // Loop where the invariant holds.
243 while (true) {
244 // Skip all enclosed ranges.
245 // ---[ First ]--->
246 // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
247 while (First->To() >= Second->To()) {
248 // Check if Second is the last in its RangeSet.
249 if (++Second == SecondEnd) {
250 // Append the Union.
251 // ---[ Union ]--->
252 // -----[ Second ]----->
253 // --------[ First ]--->
254 // UnionEnd^
255 Result.emplace_back(UnionStart, First->To());
256 // ---[ Union ]----------------->
257 // --------------[ First + 1]--->
258 // Append all remaining ranges from the First's RangeSet.
259 return AppendTheRest(++First, FirstEnd);
260 }
261 }
262
263 // Check if First and Second are disjoint. It means that we find
264 // the end of the Union. Exit the loop and append the Union.
265 // ---[ First ]=------------->
266 // ------------=[ Second ]--->
267 // ----MinusOne^
268 if (First->To() < Second->From() - One)
269 break;
270
271 // First is entirely inside the Union. Go next.
272 // ---[ Union ----------->
273 // ---- [ First ]-------->
274 // -------[ Second ]----->
275 // Check if First is the last in its RangeSet.
276 if (++First == FirstEnd) {
277 // Append the Union.
278 // ---[ Union ]--->
279 // -----[ First ]------->
280 // --------[ Second ]--->
281 // UnionEnd^
282 Result.emplace_back(UnionStart, Second->To());
283 // ---[ Union ]------------------>
284 // --------------[ Second + 1]--->
285 // Append all remaining ranges from the Second's RangeSet.
286 return AppendTheRest(++Second, SecondEnd);
287 }
288
289 // We know that we are at one of the two cases:
290 // case 1: --[ First ]--------->
291 // case 2: ----[ First ]------->
292 // --------[ Second ]---------->
293 // In both cases First starts after Second->From().
294 // Make sure that the loop invariant holds.
295 swapIterators(First, FirstEnd, Second, SecondEnd);
296 }
297
298 // Here First and Second are disjoint.
299 // Append the Union.
300 // ---[ Union ]--------------->
301 // -----------------[ Second ]--->
302 // ------[ First ]--------------->
303 // UnionEnd^
304 Result.emplace_back(UnionStart, First->To());
305
306 // Check if First is the last in its RangeSet.
307 if (++First == FirstEnd)
308 // ---[ Union ]--------------->
309 // --------------[ Second ]--->
310 // Append all remaining ranges from the Second's RangeSet.
311 return AppendTheRest(Second, SecondEnd);
312 }
313
314 llvm_unreachable("Normally, we should not reach here");
315}
316
317RangeSet RangeSet::Factory::getRangeSet(Range From) {
318 ContainerType Result;
319 Result.push_back(From);
320 return makePersistent(std::move(Result));
321}
322
323RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
324 llvm::FoldingSetNodeID ID;
325 void *InsertPos;
326
327 From.Profile(ID);
328 ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
329
330 if (!Result) {
331 // It is cheaper to fully construct the resulting range on stack
332 // and move it to the freshly allocated buffer if we don't have
333 // a set like this already.
334 Result = construct(std::move(From));
335 Cache.InsertNode(Result, InsertPos);
336 }
337
338 return Result;
339}
340
341RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
342 void *Buffer = Arena.Allocate();
343 return new (Buffer) ContainerType(std::move(From));
344}
345
346const llvm::APSInt &RangeSet::getMinValue() const {
347 assert(!isEmpty());
348 return begin()->From();
349}
350
351const llvm::APSInt &RangeSet::getMaxValue() const {
352 assert(!isEmpty());
353 return std::prev(end())->To();
354}
355
356bool clang::ento::RangeSet::isUnsigned() const {
357 assert(!isEmpty());
358 return begin()->From().isUnsigned();
359}
360
361uint32_t clang::ento::RangeSet::getBitWidth() const {
362 assert(!isEmpty());
363 return begin()->From().getBitWidth();
364}
365
366APSIntType clang::ento::RangeSet::getAPSIntType() const {
367 assert(!isEmpty());
368 return APSIntType(begin()->From());
369}
370
371bool RangeSet::containsImpl(llvm::APSInt &Point) const {
372 if (isEmpty() || !pin(Point))
373 return false;
374
375 Range Dummy(Point);
376 const_iterator It = llvm::upper_bound(*this, Dummy);
377 if (It == begin())
378 return false;
379
380 return std::prev(It)->Includes(Point);
381}
382
383bool RangeSet::pin(llvm::APSInt &Point) const {
384 APSIntType Type(getMinValue());
385 if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
386 return false;
387
388 Type.apply(Point);
389 return true;
390}
391
392bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
393 // This function has nine cases, the cartesian product of range-testing
394 // both the upper and lower bounds against the symbol's type.
395 // Each case requires a different pinning operation.
396 // The function returns false if the described range is entirely outside
397 // the range of values for the associated symbol.
398 APSIntType Type(getMinValue());
399 APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
400 APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
401
402 switch (LowerTest) {
403 case APSIntType::RTR_Below:
404 switch (UpperTest) {
405 case APSIntType::RTR_Below:
406 // The entire range is outside the symbol's set of possible values.
407 // If this is a conventionally-ordered range, the state is infeasible.
408 if (Lower <= Upper)
409 return false;
410
411 // However, if the range wraps around, it spans all possible values.
412 Lower = Type.getMinValue();
413 Upper = Type.getMaxValue();
414 break;
415 case APSIntType::RTR_Within:
416 // The range starts below what's possible but ends within it. Pin.
417 Lower = Type.getMinValue();
418 Type.apply(Upper);
419 break;
420 case APSIntType::RTR_Above:
421 // The range spans all possible values for the symbol. Pin.
422 Lower = Type.getMinValue();
423 Upper = Type.getMaxValue();
424 break;
425 }
426 break;
427 case APSIntType::RTR_Within:
428 switch (UpperTest) {
429 case APSIntType::RTR_Below:
430 // The range wraps around, but all lower values are not possible.
431 Type.apply(Lower);
432 Upper = Type.getMaxValue();
433 break;
434 case APSIntType::RTR_Within:
435 // The range may or may not wrap around, but both limits are valid.
436 Type.apply(Lower);
437 Type.apply(Upper);
438 break;
439 case APSIntType::RTR_Above:
440 // The range starts within what's possible but ends above it. Pin.
441 Type.apply(Lower);
442 Upper = Type.getMaxValue();
443 break;
444 }
445 break;
446 case APSIntType::RTR_Above:
447 switch (UpperTest) {
448 case APSIntType::RTR_Below:
449 // The range wraps but is outside the symbol's set of possible values.
450 return false;
451 case APSIntType::RTR_Within:
452 // The range starts above what's possible but ends within it (wrap).
453 Lower = Type.getMinValue();
454 Type.apply(Upper);
455 break;
456 case APSIntType::RTR_Above:
457 // The entire range is outside the symbol's set of possible values.
458 // If this is a conventionally-ordered range, the state is infeasible.
459 if (Lower <= Upper)
460 return false;
461
462 // However, if the range wraps around, it spans all possible values.
463 Lower = Type.getMinValue();
464 Upper = Type.getMaxValue();
465 break;
466 }
467 break;
468 }
469
470 return true;
471}
472
473RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
474 llvm::APSInt Upper) {
475 if (What.isEmpty() || !What.pin(Lower, Upper))
476 return getEmptySet();
477
478 ContainerType DummyContainer;
479
480 if (Lower <= Upper) {
481 // [Lower, Upper] is a regular range.
482 //
483 // Shortcut: check that there is even a possibility of the intersection
484 // by checking the two following situations:
485 //
486 // <---[ What ]---[------]------>
487 // Lower Upper
488 // -or-
489 // <----[------]----[ What ]---->
490 // Lower Upper
491 if (What.getMaxValue() < Lower || Upper < What.getMinValue())
492 return getEmptySet();
493
494 DummyContainer.push_back(
495 Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
496 } else {
497 // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
498 //
499 // Shortcut: check that there is even a possibility of the intersection
500 // by checking the following situation:
501 //
502 // <------]---[ What ]---[------>
503 // Upper Lower
504 if (What.getMaxValue() < Lower && Upper < What.getMinValue())
505 return getEmptySet();
506
507 DummyContainer.push_back(
508 Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
509 DummyContainer.push_back(
510 Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
511 }
512
513 return intersect(*What.Impl, DummyContainer);
514}
515
516RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
517 const RangeSet::ContainerType &RHS) {
518 ContainerType Result;
519 Result.reserve(std::max(LHS.size(), RHS.size()));
520
521 const_iterator First = LHS.begin(), Second = RHS.begin(),
522 FirstEnd = LHS.end(), SecondEnd = RHS.end();
523
524 // If we ran out of ranges in one set, but not in the other,
525 // it means that those elements are definitely not in the
526 // intersection.
527 while (First != FirstEnd && Second != SecondEnd) {
528 // We want to keep the following invariant at all times:
529 //
530 // ----[ First ---------------------->
531 // --------[ Second ----------------->
532 if (Second->From() < First->From())
533 swapIterators(First, FirstEnd, Second, SecondEnd);
534
535 // Loop where the invariant holds:
536 do {
537 // Check for the following situation:
538 //
539 // ----[ First ]--------------------->
540 // ---------------[ Second ]--------->
541 //
542 // which means that...
543 if (Second->From() > First->To()) {
544 // ...First is not in the intersection.
545 //
546 // We should move on to the next range after First and break out of the
547 // loop because the invariant might not be true.
548 ++First;
549 break;
550 }
551
552 // We have a guaranteed intersection at this point!
553 // And this is the current situation:
554 //
555 // ----[ First ]----------------->
556 // -------[ Second ------------------>
557 //
558 // Additionally, it definitely starts with Second->From().
559 const llvm::APSInt &IntersectionStart = Second->From();
560
561 // It is important to know which of the two ranges' ends
562 // is greater. That "longer" range might have some other
563 // intersections, while the "shorter" range might not.
564 if (Second->To() > First->To()) {
565 // Here we make a decision to keep First as the "longer"
566 // range.
567 swapIterators(First, FirstEnd, Second, SecondEnd);
568 }
569
570 // At this point, we have the following situation:
571 //
572 // ---- First ]-------------------->
573 // ---- Second ]--[ Second+1 ---------->
574 //
575 // We don't know the relationship between First->From and
576 // Second->From and we don't know whether Second+1 intersects
577 // with First.
578 //
579 // However, we know that [IntersectionStart, Second->To] is
580 // a part of the intersection...
581 Result.push_back(Range(IntersectionStart, Second->To()));
582 ++Second;
583 // ...and that the invariant will hold for a valid Second+1
584 // because First->From <= Second->To < (Second+1)->From.
585 } while (Second != SecondEnd);
586 }
587
588 if (Result.empty())
589 return getEmptySet();
590
591 return makePersistent(std::move(Result));
592}
593
594RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
595 // Shortcut: let's see if the intersection is even possible.
596 if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
597 RHS.getMaxValue() < LHS.getMinValue())
598 return getEmptySet();
599
600 return intersect(*LHS.Impl, *RHS.Impl);
601}
602
603RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
604 if (LHS.containsImpl(Point))
605 return getRangeSet(ValueFactory.getValue(Point));
606
607 return getEmptySet();
608}
609
610RangeSet RangeSet::Factory::negate(RangeSet What) {
611 if (What.isEmpty())
612 return getEmptySet();
613
614 const llvm::APSInt SampleValue = What.getMinValue();
615 const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
616 const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
617
618 ContainerType Result;
619 Result.reserve(What.size() + (SampleValue == MIN));
620
621 // Handle a special case for MIN value.
622 const_iterator It = What.begin();
623 const_iterator End = What.end();
624
625 const llvm::APSInt &From = It->From();
626 const llvm::APSInt &To = It->To();
627
628 if (From == MIN) {
629 // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
630 if (To == MAX) {
631 return What;
632 }
633
634 const_iterator Last = std::prev(End);
635
636 // Try to find and unite the following ranges:
637 // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
638 if (Last->To() == MAX) {
639 // It means that in the original range we have ranges
640 // [MIN, A], ... , [B, MAX]
641 // And the result should be [MIN, -B], ..., [-A, MAX]
642 Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
643 // We already negated Last, so we can skip it.
644 End = Last;
645 } else {
646 // Add a separate range for the lowest value.
647 Result.emplace_back(MIN, MIN);
648 }
649
650 // Skip adding the second range in case when [From, To] are [MIN, MIN].
651 if (To != MIN) {
652 Result.emplace_back(ValueFactory.getValue(-To), MAX);
653 }
654
655 // Skip the first range in the loop.
656 ++It;
657 }
658
659 // Negate all other ranges.
660 for (; It != End; ++It) {
661 // Negate int values.
662 const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
663 const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
664
665 // Add a negated range.
666 Result.emplace_back(NewFrom, NewTo);
667 }
668
669 llvm::sort(Result);
670 return makePersistent(std::move(Result));
671}
672
673// Convert range set to the given integral type using truncation and promotion.
674// This works similar to APSIntType::apply function but for the range set.
675RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
676 // Set is empty or NOOP (aka cast to the same type).
677 if (What.isEmpty() || What.getAPSIntType() == Ty)
678 return What;
679
680 const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
681 const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
682 const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
683
684 if (IsTruncation)
685 return makePersistent(truncateTo(What, Ty));
686
687 // Here we handle 2 cases:
688 // - IsConversion && !IsPromotion.
689 // In this case we handle changing a sign with same bitwidth: char -> uchar,
690 // uint -> int. Here we convert negatives to positives and positives which
691 // is out of range to negatives. We use convertTo function for that.
692 // - IsConversion && IsPromotion && !What.isUnsigned().
693 // In this case we handle changing a sign from signeds to unsigneds with
694 // higher bitwidth: char -> uint, int-> uint64. The point is that we also
695 // need convert negatives to positives and use convertTo function as well.
696 // For example, we don't need such a convertion when converting unsigned to
697 // signed with higher bitwidth, because all the values of unsigned is valid
698 // for the such signed.
699 if (IsConversion && (!IsPromotion || !What.isUnsigned()))
700 return makePersistent(convertTo(What, Ty));
701
702 assert(IsPromotion && "Only promotion operation from unsigneds left.");
703 return makePersistent(promoteTo(What, Ty));
704}
705
706RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
707 assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
708 return castTo(What, ValueFactory.getAPSIntType(T));
709}
710
711RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
712 APSIntType Ty) {
713 using llvm::APInt;
714 using llvm::APSInt;
715 ContainerType Result;
716 ContainerType Dummy;
717 // CastRangeSize is an amount of all possible values of cast type.
718 // Example: `char` has 256 values; `short` has 65536 values.
719 // But in fact we use `amount of values` - 1, because
720 // we can't keep `amount of values of UINT64` inside uint64_t.
721 // E.g. 256 is an amount of all possible values of `char` and we can't keep
722 // it inside `char`.
723 // And it's OK, it's enough to do correct calculations.
724 uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
725 for (const Range &R : What) {
726 // Get bounds of the given range.
727 APSInt FromInt = R.From();
728 APSInt ToInt = R.To();
729 // CurrentRangeSize is an amount of all possible values of the current
730 // range minus one.
731 uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
732 // This is an optimization for a specific case when this Range covers
733 // the whole range of the target type.
734 Dummy.clear();
735 if (CurrentRangeSize >= CastRangeSize) {
736 Dummy.emplace_back(ValueFactory.getMinValue(Ty),
737 ValueFactory.getMaxValue(Ty));
738 Result = std::move(Dummy);
739 break;
740 }
741 // Cast the bounds.
742 Ty.apply(FromInt);
743 Ty.apply(ToInt);
744 const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
745 const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
746 if (FromInt > ToInt) {
747 Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
748 Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
749 } else
750 Dummy.emplace_back(PersistentFrom, PersistentTo);
751 // Every range retrieved after truncation potentialy has garbage values.
752 // So, we have to unite every next range with the previouses.
753 Result = unite(Result, Dummy);
754 }
755
756 return Result;
757}
758
759// Divide the convertion into two phases (presented as loops here).
760// First phase(loop) works when casted values go in ascending order.
761// E.g. char{1,3,5,127} -> uint{1,3,5,127}
762// Interrupt the first phase and go to second one when casted values start
763// go in descending order. That means that we crossed over the middle of
764// the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
765// For instance:
766// 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
767// Here we put {1,3,5} to one array and {-128, -1} to another
768// 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
769// Here we put {128,129,255} to one array and {0,1,3} to another.
770// After that we unite both arrays.
771// NOTE: We don't just concatenate the arrays, because they may have
772// adjacent ranges, e.g.:
773// 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
774// unite -> uchar(0, 255)
775// 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
776// unite -> uchar(-2, 1)
777RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
778 APSIntType Ty) {
779 using llvm::APInt;
780 using llvm::APSInt;
781 using Bounds = std::pair<const APSInt &, const APSInt &>;
782 ContainerType AscendArray;
783 ContainerType DescendArray;
784 auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
785 // Get bounds of the given range.
786 APSInt FromInt = R.From();
787 APSInt ToInt = R.To();
788 // Cast the bounds.
789 Ty.apply(FromInt);
790 Ty.apply(ToInt);
791 return {VF.getValue(FromInt), VF.getValue(ToInt)};
792 };
793 // Phase 1. Fill the first array.
794 APSInt LastConvertedInt = Ty.getMinValue();
795 const auto *It = What.begin();
796 const auto *E = What.end();
797 while (It != E) {
798 Bounds NewBounds = CastRange(*(It++));
799 // If values stop going acsending order, go to the second phase(loop).
800 if (NewBounds.first < LastConvertedInt) {
801 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
802 break;
803 }
804 // If the range contains a midpoint, then split the range.
805 // E.g. char(-5, 5) -> uchar(251, 5)
806 // Here we shall add a range (251, 255) to the first array and (0, 5) to the
807 // second one.
808 if (NewBounds.first > NewBounds.second) {
809 DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
810 AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
811 } else
812 // Values are going acsending order.
813 AscendArray.emplace_back(NewBounds.first, NewBounds.second);
814 LastConvertedInt = NewBounds.first;
815 }
816 // Phase 2. Fill the second array.
817 while (It != E) {
818 Bounds NewBounds = CastRange(*(It++));
819 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
820 }
821 // Unite both arrays.
822 return unite(AscendArray, DescendArray);
823}
824
825/// Promotion from unsigneds to signeds/unsigneds left.
826RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
827 APSIntType Ty) {
828 ContainerType Result;
829 // We definitely know the size of the result set.
830 Result.reserve(What.size());
831
832 // Each unsigned value fits every larger type without any changes,
833 // whether the larger type is signed or unsigned. So just promote and push
834 // back each range one by one.
835 for (const Range &R : What) {
836 // Get bounds of the given range.
837 llvm::APSInt FromInt = R.From();
838 llvm::APSInt ToInt = R.To();
839 // Cast the bounds.
840 Ty.apply(FromInt);
841 Ty.apply(ToInt);
842 Result.emplace_back(ValueFactory.getValue(FromInt),
843 ValueFactory.getValue(ToInt));
844 }
845 return Result;
846}
847
848RangeSet RangeSet::Factory::deletePoint(RangeSet From,
849 const llvm::APSInt &Point) {
850 if (!From.contains(Point))
851 return From;
852
853 llvm::APSInt Upper = Point;
854 llvm::APSInt Lower = Point;
855
856 ++Upper;
857 --Lower;
858
859 // Notice that the lower bound is greater than the upper bound.
860 return intersect(From, Upper, Lower);
861}
862
863LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
864 OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
865}
866LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
867
868LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
869 OS << "{ ";
870 llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
871 OS << " }";
872}
873LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
874
875REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
876
877namespace {
878class EquivalenceClass;
879} // end anonymous namespace
880
881REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
882REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
883REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
884
885REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
886REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
887
888namespace {
889/// This class encapsulates a set of symbols equal to each other.
890///
891/// The main idea of the approach requiring such classes is in narrowing
892/// and sharing constraints between symbols within the class. Also we can
893/// conclude that there is no practical need in storing constraints for
894/// every member of the class separately.
895///
896/// Main terminology:
897///
898/// * "Equivalence class" is an object of this class, which can be efficiently
899/// compared to other classes. It represents the whole class without
900/// storing the actual in it. The members of the class however can be
901/// retrieved from the state.
902///
903/// * "Class members" are the symbols corresponding to the class. This means
904/// that A == B for every member symbols A and B from the class. Members of
905/// each class are stored in the state.
906///
907/// * "Trivial class" is a class that has and ever had only one same symbol.
908///
909/// * "Merge operation" merges two classes into one. It is the main operation
910/// to produce non-trivial classes.
911/// If, at some point, we can assume that two symbols from two distinct
912/// classes are equal, we can merge these classes.
913class EquivalenceClass : public llvm::FoldingSetNode {
914public:
915 /// Find equivalence class for the given symbol in the given state.
916 [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
917 SymbolRef Sym);
918
919 /// Merge classes for the given symbols and return a new state.
920 [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
921 ProgramStateRef State,
922 SymbolRef First,
923 SymbolRef Second);
924 // Merge this class with the given class and return a new state.
925 [[nodiscard]] inline ProgramStateRef
926 merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
927
928 /// Return a set of class members for the given state.
929 [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
930
931 /// Return true if the current class is trivial in the given state.
932 /// A class is trivial if and only if there is not any member relations stored
933 /// to it in State/ClassMembers.
934 /// An equivalence class with one member might seem as it does not hold any
935 /// meaningful information, i.e. that is a tautology. However, during the
936 /// removal of dead symbols we do not remove classes with one member for
937 /// resource and performance reasons. Consequently, a class with one member is
938 /// not necessarily trivial. It could happen that we have a class with two
939 /// members and then during the removal of dead symbols we remove one of its
940 /// members. In this case, the class is still non-trivial (it still has the
941 /// mappings in ClassMembers), even though it has only one member.
942 [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
943
944 /// Return true if the current class is trivial and its only member is dead.
945 [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
946 SymbolReaper &Reaper) const;
947
948 [[nodiscard]] static inline ProgramStateRef
949 markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
950 SymbolRef Second);
951 [[nodiscard]] static inline ProgramStateRef
952 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
953 EquivalenceClass First, EquivalenceClass Second);
954 [[nodiscard]] inline ProgramStateRef
955 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
956 EquivalenceClass Other) const;
957 [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
958 SymbolRef Sym);
959 [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
960 [[nodiscard]] inline ClassSet
961 getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
962
963 [[nodiscard]] static inline Optional<bool> areEqual(ProgramStateRef State,
964 EquivalenceClass First,
965 EquivalenceClass Second);
966 [[nodiscard]] static inline Optional<bool>
967 areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
968
969 /// Remove one member from the class.
970 [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
971 const SymbolRef Old);
972
973 /// Iterate over all symbols and try to simplify them.
974 [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
975 RangeSet::Factory &F,
976 ProgramStateRef State,
977 EquivalenceClass Class);
978
979 void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
980 LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
981 dumpToStream(State, llvm::errs());
982 }
983
984 /// Check equivalence data for consistency.
985 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
986 isClassDataConsistent(ProgramStateRef State);
987
988 [[nodiscard]] QualType getType() const {
989 return getRepresentativeSymbol()->getType();
990 }
991
992 EquivalenceClass() = delete;
993 EquivalenceClass(const EquivalenceClass &) = default;
994 EquivalenceClass &operator=(const EquivalenceClass &) = delete;
995 EquivalenceClass(EquivalenceClass &&) = default;
996 EquivalenceClass &operator=(EquivalenceClass &&) = delete;
997
998 bool operator==(const EquivalenceClass &Other) const {
999 return ID == Other.ID;
1000 }
1001 bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
1002 bool operator!=(const EquivalenceClass &Other) const {
1003 return !operator==(Other);
1004 }
1005
1006 static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1007 ID.AddInteger(CID);
1008 }
1009
1010 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1011
1012private:
1013 /* implicit */ EquivalenceClass(SymbolRef Sym)
1014 : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1015
1016 /// This function is intended to be used ONLY within the class.
1017 /// The fact that ID is a pointer to a symbol is an implementation detail
1018 /// and should stay that way.
1019 /// In the current implementation, we use it to retrieve the only member
1020 /// of the trivial class.
1021 SymbolRef getRepresentativeSymbol() const {
1022 return reinterpret_cast<SymbolRef>(ID);
1023 }
1024 static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1025
1026 inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1027 SymbolSet Members, EquivalenceClass Other,
1028 SymbolSet OtherMembers);
1029
1030 static inline bool
1031 addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1032 RangeSet::Factory &F, ProgramStateRef State,
1033 EquivalenceClass First, EquivalenceClass Second);
1034
1035 /// This is a unique identifier of the class.
1036 uintptr_t ID;
1037};
1038
1039//===----------------------------------------------------------------------===//
1040// Constraint functions
1041//===----------------------------------------------------------------------===//
1042
1043[[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
1044areFeasible(ConstraintRangeTy Constraints) {
1045 return llvm::none_of(
1046 Constraints,
1047 [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1048 return ClassConstraint.second.isEmpty();
1049 });
1050}
1051
1052[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1053 EquivalenceClass Class) {
1054 return State->get<ConstraintRange>(Class);
1055}
1056
1057[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1058 SymbolRef Sym) {
1059 return getConstraint(State, EquivalenceClass::find(State, Sym));
1060}
1061
1062[[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
1063 EquivalenceClass Class,
1064 RangeSet Constraint) {
1065 return State->set<ConstraintRange>(Class, Constraint);
1066}
1067
1068[[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
1069 ConstraintRangeTy Constraints) {
1070 return State->set<ConstraintRange>(Constraints);
1071}
1072
1073//===----------------------------------------------------------------------===//
1074// Equality/diseqiality abstraction
1075//===----------------------------------------------------------------------===//
1076
1077/// A small helper function for detecting symbolic (dis)equality.
1078///
1079/// Equality check can have different forms (like a == b or a - b) and this
1080/// class encapsulates those away if the only thing the user wants to check -
1081/// whether it's equality/diseqiality or not.
1082///
1083/// \returns true if assuming this Sym to be true means equality of operands
1084/// false if it means disequality of operands
1085/// None otherwise
1086Optional<bool> meansEquality(const SymSymExpr *Sym) {
1087 switch (Sym->getOpcode()) {
1088 case BO_Sub:
1089 // This case is: A - B != 0 -> disequality check.
1090 return false;
1091 case BO_EQ:
1092 // This case is: A == B != 0 -> equality check.
1093 return true;
1094 case BO_NE:
1095 // This case is: A != B != 0 -> diseqiality check.
1096 return false;
1097 default:
1098 return llvm::None;
1099 }
1100}
1101
1102//===----------------------------------------------------------------------===//
1103// Intersection functions
1104//===----------------------------------------------------------------------===//
1105
1106template <class SecondTy, class... RestTy>
1107[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1108 SecondTy Second, RestTy... Tail);
1109
1110template <class... RangeTy> struct IntersectionTraits;
1111
1112template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1113 // Found RangeSet, no need to check any further
1114 using Type = RangeSet;
1115};
1116
1117template <> struct IntersectionTraits<> {
1118 // We ran out of types, and we didn't find any RangeSet, so the result should
1119 // be optional.
1120 using Type = Optional<RangeSet>;
1121};
1122
1123template <class OptionalOrPointer, class... TailTy>
1124struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1125 // If current type is Optional or a raw pointer, we should keep looking.
1126 using Type = typename IntersectionTraits<TailTy...>::Type;
1127};
1128
1129template <class EndTy>
1130[[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1131 // If the list contains only RangeSet or Optional<RangeSet>, simply return
1132 // that range set.
1133 return End;
1134}
1135
1136[[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline Optional<RangeSet>
1137intersect(RangeSet::Factory &F, const RangeSet *End) {
1138 // This is an extraneous conversion from a raw pointer into Optional<RangeSet>
1139 if (End) {
1140 return *End;
1141 }
1142 return llvm::None;
1143}
1144
1145template <class... RestTy>
1146[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1147 RangeSet Second, RestTy... Tail) {
1148 // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1149 // of the function and can be sure that the result is RangeSet.
1150 return intersect(F, F.intersect(Head, Second), Tail...);
1151}
1152
1153template <class SecondTy, class... RestTy>
1154[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1155 SecondTy Second, RestTy... Tail) {
1156 if (Second) {
1157 // Here we call the <RangeSet,RangeSet,...> version of the function...
1158 return intersect(F, Head, *Second, Tail...);
1159 }
1160 // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1161 // means that the result is definitely RangeSet.
1162 return intersect(F, Head, Tail...);
1163}
1164
1165/// Main generic intersect function.
1166/// It intersects all of the given range sets. If some of the given arguments
1167/// don't hold a range set (nullptr or llvm::None), the function will skip them.
1168///
1169/// Available representations for the arguments are:
1170/// * RangeSet
1171/// * Optional<RangeSet>
1172/// * RangeSet *
1173/// Pointer to a RangeSet is automatically assumed to be nullable and will get
1174/// checked as well as the optional version. If this behaviour is undesired,
1175/// please dereference the pointer in the call.
1176///
1177/// Return type depends on the arguments' types. If we can be sure in compile
1178/// time that there will be a range set as a result, the returning type is
1179/// simply RangeSet, in other cases we have to back off to Optional<RangeSet>.
1180///
1181/// Please, prefer optional range sets to raw pointers. If the last argument is
1182/// a raw pointer and all previous arguments are None, it will cost one
1183/// additional check to convert RangeSet * into Optional<RangeSet>.
1184template <class HeadTy, class SecondTy, class... RestTy>
1185[[nodiscard]] inline
1186 typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
1187 intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1188 RestTy... Tail) {
1189 if (Head) {
1190 return intersect(F, *Head, Second, Tail...);
1191 }
1192 return intersect(F, Second, Tail...);
1193}
1194
1195//===----------------------------------------------------------------------===//
1196// Symbolic reasoning logic
1197//===----------------------------------------------------------------------===//
1198
1199/// A little component aggregating all of the reasoning we have about
1200/// the ranges of symbolic expressions.
1201///
1202/// Even when we don't know the exact values of the operands, we still
1203/// can get a pretty good estimate of the result's range.
1204class SymbolicRangeInferrer
1205 : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1206public:
1207 template <class SourceType>
1208 static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1209 SourceType Origin) {
1210 SymbolicRangeInferrer Inferrer(F, State);
1211 return Inferrer.infer(Origin);
1212 }
1213
1214 RangeSet VisitSymExpr(SymbolRef Sym) {
1215 if (Optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1216 return *RS;
1217 // If we've reached this line, the actual type of the symbolic
1218 // expression is not supported for advanced inference.
1219 // In this case, we simply backoff to the default "let's simply
1220 // infer the range from the expression's type".
1221 return infer(Sym->getType());
1222 }
1223
1224 RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1225 if (Optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1226 return *RS;
1227 return infer(USE->getType());
1228 }
1229
1230 RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1231 return VisitBinaryOperator(Sym);
1232 }
1233
1234 RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1235 return VisitBinaryOperator(Sym);
1236 }
1237
1238 RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1239 return intersect(
1240 RangeFactory,
1241 // If Sym is a difference of symbols A - B, then maybe we have range
1242 // set stored for B - A.
1243 //
1244 // If we have range set stored for both A - B and B - A then
1245 // calculate the effective range set by intersecting the range set
1246 // for A - B and the negated range set of B - A.
1247 getRangeForNegatedSymSym(SSE),
1248 // If Sym is a comparison expression (except <=>),
1249 // find any other comparisons with the same operands.
1250 // See function description.
1251 getRangeForComparisonSymbol(SSE),
1252 // If Sym is (dis)equality, we might have some information
1253 // on that in our equality classes data structure.
1254 getRangeForEqualities(SSE),
1255 // And we should always check what we can get from the operands.
1256 VisitBinaryOperator(SSE));
1257 }
1258
1259private:
1260 SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1261 : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1262
1263 /// Infer range information from the given integer constant.
1264 ///
1265 /// It's not a real "inference", but is here for operating with
1266 /// sub-expressions in a more polymorphic manner.
1267 RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1268 return {RangeFactory, Val};
1269 }
1270
1271 /// Infer range information from symbol in the context of the given type.
1272 RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1273 QualType ActualType = Sym->getType();
1274 // Check that we can reason about the symbol at all.
1275 if (ActualType->isIntegralOrEnumerationType() ||
1276 Loc::isLocType(ActualType)) {
1277 return infer(Sym);
1278 }
1279 // Otherwise, let's simply infer from the destination type.
1280 // We couldn't figure out nothing else about that expression.
1281 return infer(DestType);
1282 }
1283
1284 RangeSet infer(SymbolRef Sym) {
1285 return intersect(RangeFactory,
1286 // Of course, we should take the constraint directly
1287 // associated with this symbol into consideration.
1288 getConstraint(State, Sym),
1289 // Apart from the Sym itself, we can infer quite a lot if
1290 // we look into subexpressions of Sym.
1291 Visit(Sym));
1292 }
1293
1294 RangeSet infer(EquivalenceClass Class) {
1295 if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1296 return *AssociatedConstraint;
1297
1298 return infer(Class.getType());
1299 }
1300
1301 /// Infer range information solely from the type.
1302 RangeSet infer(QualType T) {
1303 // Lazily generate a new RangeSet representing all possible values for the
1304 // given symbol type.
1305 RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1306 ValueFactory.getMaxValue(T));
1307
1308 // References are known to be non-zero.
1309 if (T->isReferenceType())
1310 return assumeNonZero(Result, T);
1311
1312 return Result;
1313 }
1314
1315 template <class BinarySymExprTy>
1316 RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1317 // TODO #1: VisitBinaryOperator implementation might not make a good
1318 // use of the inferred ranges. In this case, we might be calculating
1319 // everything for nothing. This being said, we should introduce some
1320 // sort of laziness mechanism here.
1321 //
1322 // TODO #2: We didn't go into the nested expressions before, so it
1323 // might cause us spending much more time doing the inference.
1324 // This can be a problem for deeply nested expressions that are
1325 // involved in conditions and get tested continuously. We definitely
1326 // need to address this issue and introduce some sort of caching
1327 // in here.
1328 QualType ResultType = Sym->getType();
1329 return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1330 Sym->getOpcode(),
1331 inferAs(Sym->getRHS(), ResultType), ResultType);
1332 }
1333
1334 RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1335 RangeSet RHS, QualType T) {
1336 switch (Op) {
1337 case BO_Or:
1338 return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1339 case BO_And:
1340 return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1341 case BO_Rem:
1342 return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1343 default:
1344 return infer(T);
1345 }
1346 }
1347
1348 //===----------------------------------------------------------------------===//
1349 // Ranges and operators
1350 //===----------------------------------------------------------------------===//
1351
1352 /// Return a rough approximation of the given range set.
1353 ///
1354 /// For the range set:
1355 /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1356 /// it will return the range [x_0, y_N].
1357 static Range fillGaps(RangeSet Origin) {
1358 assert(!Origin.isEmpty());
1359 return {Origin.getMinValue(), Origin.getMaxValue()};
1360 }
1361
1362 /// Try to convert given range into the given type.
1363 ///
1364 /// It will return llvm::None only when the trivial conversion is possible.
1365 llvm::Optional<Range> convert(const Range &Origin, APSIntType To) {
1366 if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1367 To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1368 return llvm::None;
1369 }
1370 return Range(ValueFactory.Convert(To, Origin.From()),
1371 ValueFactory.Convert(To, Origin.To()));
1372 }
1373
1374 template <BinaryOperator::Opcode Op>
1375 RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1376 // We should propagate information about unfeasbility of one of the
1377 // operands to the resulting range.
1378 if (LHS.isEmpty() || RHS.isEmpty()) {
1379 return RangeFactory.getEmptySet();
1380 }
1381
1382 Range CoarseLHS = fillGaps(LHS);
1383 Range CoarseRHS = fillGaps(RHS);
1384
1385 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1386
1387 // We need to convert ranges to the resulting type, so we can compare values
1388 // and combine them in a meaningful (in terms of the given operation) way.
1389 auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1390 auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1391
1392 // It is hard to reason about ranges when conversion changes
1393 // borders of the ranges.
1394 if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1395 return infer(T);
1396 }
1397
1398 return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1399 }
1400
1401 template <BinaryOperator::Opcode Op>
1402 RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1403 return infer(T);
1404 }
1405
1406 /// Return a symmetrical range for the given range and type.
1407 ///
1408 /// If T is signed, return the smallest range [-x..x] that covers the original
1409 /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1410 /// exist due to original range covering min(T)).
1411 ///
1412 /// If T is unsigned, return the smallest range [0..x] that covers the
1413 /// original range.
1414 Range getSymmetricalRange(Range Origin, QualType T) {
1415 APSIntType RangeType = ValueFactory.getAPSIntType(T);
1416
1417 if (RangeType.isUnsigned()) {
1418 return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1419 }
1420
1421 if (Origin.From().isMinSignedValue()) {
1422 // If mini is a minimal signed value, absolute value of it is greater
1423 // than the maximal signed value. In order to avoid these
1424 // complications, we simply return the whole range.
1425 return {ValueFactory.getMinValue(RangeType),
1426 ValueFactory.getMaxValue(RangeType)};
1427 }
1428
1429 // At this point, we are sure that the type is signed and we can safely
1430 // use unary - operator.
1431 //
1432 // While calculating absolute maximum, we can use the following formula
1433 // because of these reasons:
1434 // * If From >= 0 then To >= From and To >= -From.
1435 // AbsMax == To == max(To, -From)
1436 // * If To <= 0 then -From >= -To and -From >= From.
1437 // AbsMax == -From == max(-From, To)
1438 // * Otherwise, From <= 0, To >= 0, and
1439 // AbsMax == max(abs(From), abs(To))
1440 llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1441
1442 // Intersection is guaranteed to be non-empty.
1443 return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1444 }
1445
1446 /// Return a range set subtracting zero from \p Domain.
1447 RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1448 APSIntType IntType = ValueFactory.getAPSIntType(T);
1449 return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1450 }
1451
1452 template <typename ProduceNegatedSymFunc>
1453 Optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1454 QualType T) {
1455 // Do not negate if the type cannot be meaningfully negated.
1456 if (!T->isUnsignedIntegerOrEnumerationType() &&
1457 !T->isSignedIntegerOrEnumerationType())
1458 return llvm::None;
1459
1460 if (SymbolRef NegatedSym = F())
1461 if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1462 return RangeFactory.negate(*NegatedRange);
1463
1464 return llvm::None;
1465 }
1466
1467 Optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1468 // Just get the operand when we negate a symbol that is already negated.
1469 // -(-a) == a
1470 return getRangeForNegatedExpr(
1471 [USE]() -> SymbolRef {
1472 if (USE->getOpcode() == UO_Minus)
1473 return USE->getOperand();
1474 return nullptr;
1475 },
1476 USE->getType());
1477 }
1478
1479 Optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1480 return getRangeForNegatedExpr(
1481 [SSE, State = this->State]() -> SymbolRef {
1482 if (SSE->getOpcode() == BO_Sub)
1483 return State->getSymbolManager().getSymSymExpr(
1484 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1485 return nullptr;
1486 },
1487 SSE->getType());
1488 }
1489
1490 Optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1491 return getRangeForNegatedExpr(
1492 [Sym, State = this->State]() {
1493 return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1494 Sym->getType());
1495 },
1496 Sym->getType());
1497 }
1498
1499 // Returns ranges only for binary comparison operators (except <=>)
1500 // when left and right operands are symbolic values.
1501 // Finds any other comparisons with the same operands.
1502 // Then do logical calculations and refuse impossible branches.
1503 // E.g. (x < y) and (x > y) at the same time are impossible.
1504 // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1505 // E.g. (x == y) and (y == x) are just reversed but the same.
1506 // It covers all possible combinations (see CmpOpTable description).
1507 // Note that `x` and `y` can also stand for subexpressions,
1508 // not only for actual symbols.
1509 Optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1510 const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1511
1512 // We currently do not support <=> (C++20).
1513 if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1514 return llvm::None;
1515
1516 static const OperatorRelationsTable CmpOpTable{};
1517
1518 const SymExpr *LHS = SSE->getLHS();
1519 const SymExpr *RHS = SSE->getRHS();
1520 QualType T = SSE->getType();
1521
1522 SymbolManager &SymMgr = State->getSymbolManager();
1523
1524 // We use this variable to store the last queried operator (`QueriedOP`)
1525 // for which the `getCmpOpState` returned with `Unknown`. If there are two
1526 // different OPs that returned `Unknown` then we have to query the special
1527 // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1528 // never returns `Unknown`, so `CurrentOP` is a good initial value.
1529 BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1530
1531 // Loop goes through all of the columns exept the last one ('UnknownX2').
1532 // We treat `UnknownX2` column separately at the end of the loop body.
1533 for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1534
1535 // Let's find an expression e.g. (x < y).
1536 BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
1537 const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1538 const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1539
1540 // If ranges were not previously found,
1541 // try to find a reversed expression (y > x).
1542 if (!QueriedRangeSet) {
1543 const BinaryOperatorKind ROP =
1544 BinaryOperator::reverseComparisonOp(QueriedOP);
1545 SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1546 QueriedRangeSet = getConstraint(State, SymSym);
1547 }
1548
1549 if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1550 continue;
1551
1552 const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1553 const bool isInFalseBranch =
1554 ConcreteValue ? (*ConcreteValue == 0) : false;
1555
1556 // If it is a false branch, we shall be guided by opposite operator,
1557 // because the table is made assuming we are in the true branch.
1558 // E.g. when (x <= y) is false, then (x > y) is true.
1559 if (isInFalseBranch)
1560 QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1561
1562 OperatorRelationsTable::TriStateKind BranchState =
1563 CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1564
1565 if (BranchState == OperatorRelationsTable::Unknown) {
1566 if (LastQueriedOpToUnknown != CurrentOP &&
1567 LastQueriedOpToUnknown != QueriedOP) {
1568 // If we got the Unknown state for both different operators.
1569 // if (x <= y) // assume true
1570 // if (x != y) // assume true
1571 // if (x < y) // would be also true
1572 // Get a state from `UnknownX2` column.
1573 BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1574 } else {
1575 LastQueriedOpToUnknown = QueriedOP;
1576 continue;
1577 }
1578 }
1579
1580 return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1581 : getFalseRange(T);
1582 }
1583
1584 return llvm::None;
1585 }
1586
1587 Optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1588 Optional<bool> Equality = meansEquality(Sym);
1589
1590 if (!Equality)
1591 return llvm::None;
1592
1593 if (Optional<bool> AreEqual =
1594 EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1595 // Here we cover two cases at once:
1596 // * if Sym is equality and its operands are known to be equal -> true
1597 // * if Sym is disequality and its operands are disequal -> true
1598 if (*AreEqual == *Equality) {
1599 return getTrueRange(Sym->getType());
1600 }
1601 // Opposite combinations result in false.
1602 return getFalseRange(Sym->getType());
1603 }
1604
1605 return llvm::None;
1606 }
1607
1608 RangeSet getTrueRange(QualType T) {
1609 RangeSet TypeRange = infer(T);
1610 return assumeNonZero(TypeRange, T);
1611 }
1612
1613 RangeSet getFalseRange(QualType T) {
1614 const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1615 return RangeSet(RangeFactory, Zero);
1616 }
1617
1618 BasicValueFactory &ValueFactory;
1619 RangeSet::Factory &RangeFactory;
1620 ProgramStateRef State;
1621};
1622
1623//===----------------------------------------------------------------------===//
1624// Range-based reasoning about symbolic operations
1625//===----------------------------------------------------------------------===//
1626
1627template <>
1628RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1629 QualType T) {
1630 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1631 llvm::APSInt Zero = ResultType.getZeroValue();
1632
1633 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1634 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1635
1636 bool IsLHSNegative = LHS.To() < Zero;
1637 bool IsRHSNegative = RHS.To() < Zero;
1638
1639 // Check if both ranges have the same sign.
1640 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1641 (IsLHSNegative && IsRHSNegative)) {
1642 // The result is definitely greater or equal than any of the operands.
1643 const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1644
1645 // We estimate maximal value for positives as the maximal value for the
1646 // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
1647 //
1648 // TODO: We basically, limit the resulting range from below, but don't do
1649 // anything with the upper bound.
1650 //
1651 // For positive operands, it can be done as follows: for the upper
1652 // bound of LHS and RHS we calculate the most significant bit set.
1653 // Let's call it the N-th bit. Then we can estimate the maximal
1654 // number to be 2^(N+1)-1, i.e. the number with all the bits up to
1655 // the N-th bit set.
1656 const llvm::APSInt &Max = IsLHSNegative
1657 ? ValueFactory.getValue(--Zero)
1658 : ValueFactory.getMaxValue(ResultType);
1659
1660 return {RangeFactory, ValueFactory.getValue(Min), Max};
1661 }
1662
1663 // Otherwise, let's check if at least one of the operands is negative.
1664 if (IsLHSNegative || IsRHSNegative) {
1665 // This means that the result is definitely negative as well.
1666 return {RangeFactory, ValueFactory.getMinValue(ResultType),
1667 ValueFactory.getValue(--Zero)};
1668 }
1669
1670 RangeSet DefaultRange = infer(T);
1671
1672 // It is pretty hard to reason about operands with different signs
1673 // (and especially with possibly different signs). We simply check if it
1674 // can be zero. In order to conclude that the result could not be zero,
1675 // at least one of the operands should be definitely not zero itself.
1676 if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1677 return assumeNonZero(DefaultRange, T);
1678 }
1679
1680 // Nothing much else to do here.
1681 return DefaultRange;
1682}
1683
1684template <>
1685RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1686 Range RHS,
1687 QualType T) {
1688 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1689 llvm::APSInt Zero = ResultType.getZeroValue();
1690
1691 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1692 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1693
1694 bool IsLHSNegative = LHS.To() < Zero;
1695 bool IsRHSNegative = RHS.To() < Zero;
1696
1697 // Check if both ranges have the same sign.
1698 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1699 (IsLHSNegative && IsRHSNegative)) {
1700 // The result is definitely less or equal than any of the operands.
1701 const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1702
1703 // We conservatively estimate lower bound to be the smallest positive
1704 // or negative value corresponding to the sign of the operands.
1705 const llvm::APSInt &Min = IsLHSNegative
1706 ? ValueFactory.getMinValue(ResultType)
1707 : ValueFactory.getValue(Zero);
1708
1709 return {RangeFactory, Min, Max};
1710 }
1711
1712 // Otherwise, let's check if at least one of the operands is positive.
1713 if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1714 // This makes result definitely positive.
1715 //
1716 // We can also reason about a maximal value by finding the maximal
1717 // value of the positive operand.
1718 const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1719
1720 // The minimal value on the other hand is much harder to reason about.
1721 // The only thing we know for sure is that the result is positive.
1722 return {RangeFactory, ValueFactory.getValue(Zero),
1723 ValueFactory.getValue(Max)};
1724 }
1725
1726 // Nothing much else to do here.
1727 return infer(T);
1728}
1729
1730template <>
1731RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1732 Range RHS,
1733 QualType T) {
1734 llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1735
1736 Range ConservativeRange = getSymmetricalRange(RHS, T);
1737
1738 llvm::APSInt Max = ConservativeRange.To();
1739 llvm::APSInt Min = ConservativeRange.From();
1740
1741 if (Max == Zero) {
1742 // It's an undefined behaviour to divide by 0 and it seems like we know
1743 // for sure that RHS is 0. Let's say that the resulting range is
1744 // simply infeasible for that matter.
1745 return RangeFactory.getEmptySet();
1746 }
1747
1748 // At this point, our conservative range is closed. The result, however,
1749 // couldn't be greater than the RHS' maximal absolute value. Because of
1750 // this reason, we turn the range into open (or half-open in case of
1751 // unsigned integers).
1752 //
1753 // While we operate on integer values, an open interval (a, b) can be easily
1754 // represented by the closed interval [a + 1, b - 1]. And this is exactly
1755 // what we do next.
1756 //
1757 // If we are dealing with unsigned case, we shouldn't move the lower bound.
1758 if (Min.isSigned()) {
1759 ++Min;
1760 }
1761 --Max;
1762
1763 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1764 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1765
1766 // Remainder operator results with negative operands is implementation
1767 // defined. Positive cases are much easier to reason about though.
1768 if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1769 // If maximal value of LHS is less than maximal value of RHS,
1770 // the result won't get greater than LHS.To().
1771 Max = std::min(LHS.To(), Max);
1772 // We want to check if it is a situation similar to the following:
1773 //
1774 // <------------|---[ LHS ]--------[ RHS ]----->
1775 // -INF 0 +INF
1776 //
1777 // In this situation, we can conclude that (LHS / RHS) == 0 and
1778 // (LHS % RHS) == LHS.
1779 Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1780 }
1781
1782 // Nevertheless, the symmetrical range for RHS is a conservative estimate
1783 // for any sign of either LHS, or RHS.
1784 return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1785}
1786
1787//===----------------------------------------------------------------------===//
1788// Constraint manager implementation details
1789//===----------------------------------------------------------------------===//
1790
1791class RangeConstraintManager : public RangedConstraintManager {
1792public:
1793 RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1794 : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1795
1796 //===------------------------------------------------------------------===//
1797 // Implementation for interface from ConstraintManager.
1798 //===------------------------------------------------------------------===//
1799
1800 bool haveEqualConstraints(ProgramStateRef S1,
1801 ProgramStateRef S2) const override {
1802 // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1803 // so comparing constraint ranges and class maps should be
1804 // sufficient.
1805 return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1806 S1->get<ClassMap>() == S2->get<ClassMap>();
1807 }
1808
1809 bool canReasonAbout(SVal X) const override;
1810
1811 ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1812
1813 const llvm::APSInt *getSymVal(ProgramStateRef State,
1814 SymbolRef Sym) const override;
1815
1816 ProgramStateRef removeDeadBindings(ProgramStateRef State,
1817 SymbolReaper &SymReaper) override;
1818
1819 void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1820 unsigned int Space = 0, bool IsDot = false) const override;
1821 void printValue(raw_ostream &Out, ProgramStateRef State,
1822 SymbolRef Sym) override;
1823 void printConstraints(raw_ostream &Out, ProgramStateRef State,
1824 const char *NL = "\n", unsigned int Space = 0,
1825 bool IsDot = false) const;
1826 void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1827 const char *NL = "\n", unsigned int Space = 0,
1828 bool IsDot = false) const;
1829 void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1830 const char *NL = "\n", unsigned int Space = 0,
1831 bool IsDot = false) const;
1832
1833 //===------------------------------------------------------------------===//
1834 // Implementation for interface from RangedConstraintManager.
1835 //===------------------------------------------------------------------===//
1836
1837 ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1838 const llvm::APSInt &V,
1839 const llvm::APSInt &Adjustment) override;
1840
1841 ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1842 const llvm::APSInt &V,
1843 const llvm::APSInt &Adjustment) override;
1844
1845 ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1846 const llvm::APSInt &V,
1847 const llvm::APSInt &Adjustment) override;
1848
1849 ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1850 const llvm::APSInt &V,
1851 const llvm::APSInt &Adjustment) override;
1852
1853 ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1854 const llvm::APSInt &V,
1855 const llvm::APSInt &Adjustment) override;
1856
1857 ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1858 const llvm::APSInt &V,
1859 const llvm::APSInt &Adjustment) override;
1860
1861 ProgramStateRef assumeSymWithinInclusiveRange(
1862 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1863 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1864
1865 ProgramStateRef assumeSymOutsideInclusiveRange(
1866 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1867 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1868
1869private:
1870 RangeSet::Factory F;
1871
1872 RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
1873 RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1874 ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1875 RangeSet Range);
1876 ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1877 RangeSet Range);
1878
1879 RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1880 const llvm::APSInt &Int,
1881 const llvm::APSInt &Adjustment);
1882 RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1883 const llvm::APSInt &Int,
1884 const llvm::APSInt &Adjustment);
1885 RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1886 const llvm::APSInt &Int,
1887 const llvm::APSInt &Adjustment);
1888 RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1889 const llvm::APSInt &Int,
1890 const llvm::APSInt &Adjustment);
1891 RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1892 const llvm::APSInt &Int,
1893 const llvm::APSInt &Adjustment);
1894};
1895
1896//===----------------------------------------------------------------------===//
1897// Constraint assignment logic
1898//===----------------------------------------------------------------------===//
1899
1900/// ConstraintAssignorBase is a small utility class that unifies visitor
1901/// for ranges with a visitor for constraints (rangeset/range/constant).
1902///
1903/// It is designed to have one derived class, but generally it can have more.
1904/// Derived class can control which types we handle by defining methods of the
1905/// following form:
1906///
1907/// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1908/// CONSTRAINT Constraint);
1909///
1910/// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1911/// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1912/// return value signifies whether we should try other handle methods
1913/// (i.e. false would mean to stop right after calling this method)
1914template <class Derived> class ConstraintAssignorBase {
1915public:
1916 using Const = const llvm::APSInt &;
1917
1918#define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1919
1920#define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
1921 if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
1922 return false
1923
1924 void assign(SymbolRef Sym, RangeSet Constraint) {
1925 assignImpl(Sym, Constraint);
1926 }
1927
1928 bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1929 switch (Sym->getKind()) {
1930#define SYMBOL(Id, Parent) \
1931 case SymExpr::Id##Kind: \
1932 DISPATCH(Id);
1933#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1934 }
1935 llvm_unreachable("Unknown SymExpr kind!");
1936 }
1937
1938#define DEFAULT_ASSIGN(Id) \
1939 bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
1940 return true; \
1941 } \
1942 bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
1943 bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
1944
1945 // When we dispatch for constraint types, we first try to check
1946 // if the new constraint is the constant and try the corresponding
1947 // assignor methods. If it didn't interrupt, we can proceed to the
1948 // range, and finally to the range set.
1949#define CONSTRAINT_DISPATCH(Id) \
1950 if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
1951 ASSIGN(Id, Const, Sym, *Const); \
1952 } \
1953 if (Constraint.size() == 1) { \
1954 ASSIGN(Id, Range, Sym, *Constraint.begin()); \
1955 } \
1956 ASSIGN(Id, RangeSet, Sym, Constraint)
1957
1958 // Our internal assign method first tries to call assignor methods for all
1959 // constraint types that apply. And if not interrupted, continues with its
1960 // parent class.
1961#define SYMBOL(Id, Parent) \
1962 bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
1963 CONSTRAINT_DISPATCH(Id); \
1964 DISPATCH(Parent); \
1965 } \
1966 DEFAULT_ASSIGN(Id)
1967#define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
1968#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1969
1970 // Default implementations for the top class that doesn't have parents.
1971 bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
1972 CONSTRAINT_DISPATCH(SymExpr);
1973 return true;
1974 }
1975 DEFAULT_ASSIGN(SymExpr);
1976
1977#undef DISPATCH
1978#undef CONSTRAINT_DISPATCH
1979#undef DEFAULT_ASSIGN
1980#undef ASSIGN
1981};
1982
1983/// A little component aggregating all of the reasoning we have about
1984/// assigning new constraints to symbols.
1985///
1986/// The main purpose of this class is to associate constraints to symbols,
1987/// and impose additional constraints on other symbols, when we can imply
1988/// them.
1989///
1990/// It has a nice symmetry with SymbolicRangeInferrer. When the latter
1991/// can provide more precise ranges by looking into the operands of the
1992/// expression in question, ConstraintAssignor looks into the operands
1993/// to see if we can imply more from the new constraint.
1994class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
1995public:
1996 template <class ClassOrSymbol>
1997 [[nodiscard]] static ProgramStateRef
1998 assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
1999 ClassOrSymbol CoS, RangeSet NewConstraint) {
2000 if (!State || NewConstraint.isEmpty())
2001 return nullptr;
2002
2003 ConstraintAssignor Assignor{State, Builder, F};
2004 return Assignor.assign(CoS, NewConstraint);
2005 }
2006
2007 /// Handle expressions like: a % b != 0.
2008 template <typename SymT>
2009 bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2010 if (Sym->getOpcode() != BO_Rem)
2011 return true;
2012 // a % b != 0 implies that a != 0.
2013 if (!Constraint.containsZero()) {
2014 SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2015 if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2016 State = State->assume(*NonLocSymSVal, true);
2017 if (!State)
2018 return false;
2019 }
2020 }
2021 return true;
2022 }
2023
2024 inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
2025 inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2026 RangeSet Constraint) {
2027 return handleRemainderOp(Sym, Constraint);
2028 }
2029 inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2030 RangeSet Constraint);
2031
2032private:
2033 ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2034 RangeSet::Factory &F)
2035 : State(State), Builder(Builder), RangeFactory(F) {}
2036 using Base = ConstraintAssignorBase<ConstraintAssignor>;
2037
2038 /// Base method for handling new constraints for symbols.
2039 [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2040 // All constraints are actually associated with equivalence classes, and
2041 // that's what we are going to do first.
2042 State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2043 if (!State)
2044 return nullptr;
2045
2046 // And after that we can check what other things we can get from this
2047 // constraint.
2048 Base::assign(Sym, NewConstraint);
2049 return State;
2050 }
2051
2052 /// Base method for handling new constraints for classes.
2053 [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
2054 RangeSet NewConstraint) {
2055 // There is a chance that we might need to update constraints for the
2056 // classes that are known to be disequal to Class.
2057 //
2058 // In order for this to be even possible, the new constraint should
2059 // be simply a constant because we can't reason about range disequalities.
2060 if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2061
2062 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2063 ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2064
2065 // Add new constraint.
2066 Constraints = CF.add(Constraints, Class, NewConstraint);
2067
2068 for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2069 RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2070 RangeFactory, State, DisequalClass);
2071
2072 UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2073
2074 // If we end up with at least one of the disequal classes to be
2075 // constrained with an empty range-set, the state is infeasible.
2076 if (UpdatedConstraint.isEmpty())
2077 return nullptr;
2078
2079 Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2080 }
2081 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2082 "a state with infeasible constraints");
2083
2084 return setConstraints(State, Constraints);
2085 }
2086
2087 return setConstraint(State, Class, NewConstraint);
2088 }
2089
2090 ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2091 SymbolRef RHS) {
2092 return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2093 }
2094
2095 ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2096 SymbolRef RHS) {
2097 return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2098 }
2099
2100 [[nodiscard]] Optional<bool> interpreteAsBool(RangeSet Constraint) {
2101 assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2102
2103 if (Constraint.getConcreteValue())
2104 return !Constraint.getConcreteValue()->isZero();
2105
2106 if (!Constraint.containsZero())
2107 return true;
2108
2109 return llvm::None;
2110 }
2111
2112 ProgramStateRef State;
2113 SValBuilder &Builder;
2114 RangeSet::Factory &RangeFactory;
2115};
2116
2117
2118bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2119 const llvm::APSInt &Constraint) {
2120 llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2121 // Iterate over all equivalence classes and try to simplify them.
2122 ClassMembersTy Members = State->get<ClassMembers>();
2123 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2124 EquivalenceClass Class = ClassToSymbolSet.first;
2125 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2126 if (!State)
2127 return false;
2128 SimplifiedClasses.insert(Class);
2129 }
2130
2131 // Trivial equivalence classes (those that have only one symbol member) are
2132 // not stored in the State. Thus, we must skim through the constraints as
2133 // well. And we try to simplify symbols in the constraints.
2134 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2135 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2136 EquivalenceClass Class = ClassConstraint.first;
2137 if (SimplifiedClasses.count(Class)) // Already simplified.
2138 continue;
2139 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2140 if (!State)
2141 return false;
2142 }
2143
2144 // We may have trivial equivalence classes in the disequality info as
2145 // well, and we need to simplify them.
2146 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2147 for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2148 DisequalityInfo) {
2149 EquivalenceClass Class = DisequalityEntry.first;
2150 ClassSet DisequalClasses = DisequalityEntry.second;
2151 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2152 if (!State)
2153 return false;
2154 }
2155
2156 return true;
2157}
2158
2159bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2160 RangeSet Constraint) {
2161 if (!handleRemainderOp(Sym, Constraint))
2162 return false;
2163
2164 Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2165
2166 if (!ConstraintAsBool)
2167 return true;
2168
2169 if (Optional<bool> Equality = meansEquality(Sym)) {
2170 // Here we cover two cases:
2171 // * if Sym is equality and the new constraint is true -> Sym's operands
2172 // should be marked as equal
2173 // * if Sym is disequality and the new constraint is false -> Sym's
2174 // operands should be also marked as equal
2175 if (*Equality == *ConstraintAsBool) {
2176 State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2177 } else {
2178 // Other combinations leave as with disequal operands.
2179 State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2180 }
2181
2182 if (!State)
2183 return false;
2184 }
2185
2186 return true;
2187}
2188
2189} // end anonymous namespace
2190
2191std::unique_ptr<ConstraintManager>
2192ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
2193 ExprEngine *Eng) {
2194 return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2195}
2196
2197ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
2198 ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2199 ConstraintMap Result = F.getEmptyMap();
2200
2201 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2202 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2203 EquivalenceClass Class = ClassConstraint.first;
2204 SymbolSet ClassMembers = Class.getClassMembers(State);
2205 assert(!ClassMembers.isEmpty() &&
2206 "Class must always have at least one member!");
2207
2208 SymbolRef Representative = *ClassMembers.begin();
2209 Result = F.add(Result, Representative, ClassConstraint.second);
2210 }
2211
2212 return Result;
2213}
2214
2215//===----------------------------------------------------------------------===//
2216// EqualityClass implementation details
2217//===----------------------------------------------------------------------===//
2218
2219LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2220 raw_ostream &os) const {
2221 SymbolSet ClassMembers = getClassMembers(State);
2222 for (const SymbolRef &MemberSym : ClassMembers) {
2223 MemberSym->dump();
2224 os << "\n";
2225 }
2226}
2227
2228inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2229 SymbolRef Sym) {
2230 assert(State && "State should not be null");
2231 assert(Sym && "Symbol should not be null");
2232 // We store far from all Symbol -> Class mappings
2233 if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2234 return *NontrivialClass;
2235
2236 // This is a trivial class of Sym.
2237 return Sym;
2238}
2239
2240inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2241 ProgramStateRef State,
2242 SymbolRef First,
2243 SymbolRef Second) {
2244 EquivalenceClass FirstClass = find(State, First);
2245 EquivalenceClass SecondClass = find(State, Second);
2246
2247 return FirstClass.merge(F, State, SecondClass);
2248}
2249
2250inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2251 ProgramStateRef State,
2252 EquivalenceClass Other) {
2253 // It is already the same class.
2254 if (*this == Other)
2255 return State;
2256
2257 // FIXME: As of now, we support only equivalence classes of the same type.
2258 // This limitation is connected to the lack of explicit casts in
2259 // our symbolic expression model.
2260 //
2261 // That means that for `int x` and `char y` we don't distinguish
2262 // between these two very different cases:
2263 // * `x == y`
2264 // * `(char)x == y`
2265 //
2266 // The moment we introduce symbolic casts, this restriction can be
2267 // lifted.
2268 if (getType() != Other.getType())
2269 return State;
2270
2271 SymbolSet Members = getClassMembers(State);
2272 SymbolSet OtherMembers = Other.getClassMembers(State);
2273
2274 // We estimate the size of the class by the height of tree containing
2275 // its members. Merging is not a trivial operation, so it's easier to
2276 // merge the smaller class into the bigger one.
2277 if (Members.getHeight() >= OtherMembers.getHeight()) {
2278 return mergeImpl(F, State, Members, Other, OtherMembers);
2279 } else {
2280 return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2281 }
2282}
2283
2284inline ProgramStateRef
2285EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2286 ProgramStateRef State, SymbolSet MyMembers,
2287 EquivalenceClass Other, SymbolSet OtherMembers) {
2288 // Essentially what we try to recreate here is some kind of union-find
2289 // data structure. It does have certain limitations due to persistence
2290 // and the need to remove elements from classes.
2291 //
2292 // In this setting, EquialityClass object is the representative of the class
2293 // or the parent element. ClassMap is a mapping of class members to their
2294 // parent. Unlike the union-find structure, they all point directly to the
2295 // class representative because we don't have an opportunity to actually do
2296 // path compression when dealing with immutability. This means that we
2297 // compress paths every time we do merges. It also means that we lose
2298 // the main amortized complexity benefit from the original data structure.
2299 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2300 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2301
2302 // 1. If the merged classes have any constraints associated with them, we
2303 // need to transfer them to the class we have left.
2304 //
2305 // Intersection here makes perfect sense because both of these constraints
2306 // must hold for the whole new class.
2307 if (Optional<RangeSet> NewClassConstraint =
2308 intersect(RangeFactory, getConstraint(State, *this),
2309 getConstraint(State, Other))) {
2310 // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2311 // range inferrer shouldn't generate ranges incompatible with
2312 // equivalence classes. However, at the moment, due to imperfections
2313 // in the solver, it is possible and the merge function can also
2314 // return infeasible states aka null states.
2315 if (NewClassConstraint->isEmpty())
2316 // Infeasible state
2317 return nullptr;
2318
2319 // No need in tracking constraints of a now-dissolved class.
2320 Constraints = CRF.remove(Constraints, Other);
2321 // Assign new constraints for this class.
2322 Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2323
2324 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2325 "a state with infeasible constraints");
2326
2327 State = State->set<ConstraintRange>(Constraints);
2328 }
2329
2330 // 2. Get ALL equivalence-related maps
2331 ClassMapTy Classes = State->get<ClassMap>();
2332 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2333
2334 ClassMembersTy Members = State->get<ClassMembers>();
2335 ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2336
2337 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2338 DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2339
2340 ClassSet::Factory &CF = State->get_context<ClassSet>();
2341 SymbolSet::Factory &F = getMembersFactory(State);
2342
2343 // 2. Merge members of the Other class into the current class.
2344 SymbolSet NewClassMembers = MyMembers;
2345 for (SymbolRef Sym : OtherMembers) {
2346 NewClassMembers = F.add(NewClassMembers, Sym);
2347 // *this is now the class for all these new symbols.
2348 Classes = CMF.add(Classes, Sym, *this);
2349 }
2350
2351 // 3. Adjust member mapping.
2352 //
2353 // No need in tracking members of a now-dissolved class.
2354 Members = MF.remove(Members, Other);
2355 // Now only the current class is mapped to all the symbols.
2356 Members = MF.add(Members, *this, NewClassMembers);
2357
2358 // 4. Update disequality relations
2359 ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2360 // We are about to merge two classes but they are already known to be
2361 // non-equal. This is a contradiction.
2362 if (DisequalToOther.contains(*this))
2363 return nullptr;
2364
2365 if (!DisequalToOther.isEmpty()) {
2366 ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2367 DisequalityInfo = DF.remove(DisequalityInfo, Other);
2368
2369 for (EquivalenceClass DisequalClass : DisequalToOther) {
2370 DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2371
2372 // Disequality is a symmetric relation meaning that if
2373 // DisequalToOther not null then the set for DisequalClass is not
2374 // empty and has at least Other.
2375 ClassSet OriginalSetLinkedToOther =
2376 *DisequalityInfo.lookup(DisequalClass);
2377
2378 // Other will be eliminated and we should replace it with the bigger
2379 // united class.
2380 ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2381 NewSet = CF.add(NewSet, *this);
2382
2383 DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2384 }
2385
2386 DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2387 State = State->set<DisequalityMap>(DisequalityInfo);
2388 }
2389
2390 // 5. Update the state
2391 State = State->set<ClassMap>(Classes);
2392 State = State->set<ClassMembers>(Members);
2393
2394 return State;
2395}
2396
2397inline SymbolSet::Factory &
2398EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2399 return State->get_context<SymbolSet>();
2400}
2401
2402SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2403 if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2404 return *Members;
2405
2406 // This class is trivial, so we need to construct a set
2407 // with just that one symbol from the class.
2408 SymbolSet::Factory &F = getMembersFactory(State);
2409 return F.add(F.getEmptySet(), getRepresentativeSymbol());
2410}
2411
2412bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2413 return State->get<ClassMembers>(*this) == nullptr;
2414}
2415
2416bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2417 SymbolReaper &Reaper) const {
2418 return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2419}
2420
2421inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2422 ProgramStateRef State,
2423 SymbolRef First,
2424 SymbolRef Second) {
2425 return markDisequal(RF, State, find(State, First), find(State, Second));
2426}
2427
2428inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2429 ProgramStateRef State,
2430 EquivalenceClass First,
2431 EquivalenceClass Second) {
2432 return First.markDisequal(RF, State, Second);
2433}
2434
2435inline ProgramStateRef
2436EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2437 EquivalenceClass Other) const {
2438 // If we know that two classes are equal, we can only produce an infeasible
2439 // state.
2440 if (*this == Other) {
2441 return nullptr;
2442 }
2443
2444 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2445 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2446
2447 // Disequality is a symmetric relation, so if we mark A as disequal to B,
2448 // we should also mark B as disequalt to A.
2449 if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2450 Other) ||
2451 !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2452 *this))
2453 return nullptr;
2454
2455 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2456 "a state with infeasible constraints");
2457
2458 State = State->set<DisequalityMap>(DisequalityInfo);
2459 State = State->set<ConstraintRange>(Constraints);
2460
2461 return State;
2462}
2463
2464inline bool EquivalenceClass::addToDisequalityInfo(
2465 DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2466 RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2467 EquivalenceClass Second) {
2468
2469 // 1. Get all of the required factories.
2470 DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2471 ClassSet::Factory &CF = State->get_context<ClassSet>();
2472 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2473
2474 // 2. Add Second to the set of classes disequal to First.
2475 const ClassSet *CurrentSet = Info.lookup(First);
2476 ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2477 NewSet = CF.add(NewSet, Second);
2478
2479 Info = F.add(Info, First, NewSet);
2480
2481 // 3. If Second is known to be a constant, we can delete this point
2482 // from the constraint asociated with First.
2483 //
2484 // So, if Second == 10, it means that First != 10.
2485 // At the same time, the same logic does not apply to ranges.
2486 if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2487 if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2488
2489 RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2490 RF, State, First.getRepresentativeSymbol());
2491
2492 FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2493
2494 // If the First class is about to be constrained with an empty
2495 // range-set, the state is infeasible.
2496 if (FirstConstraint.isEmpty())
2497 return false;
2498
2499 Constraints = CRF.add(Constraints, First, FirstConstraint);
2500 }
2501
2502 return true;
2503}
2504
2505inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2506 SymbolRef FirstSym,
2507 SymbolRef SecondSym) {
2508 return EquivalenceClass::areEqual(State, find(State, FirstSym),
2509 find(State, SecondSym));
2510}
2511
2512inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2513 EquivalenceClass First,
2514 EquivalenceClass Second) {
2515 // The same equivalence class => symbols are equal.
2516 if (First == Second)
2517 return true;
2518
2519 // Let's check if we know anything about these two classes being not equal to
2520 // each other.
2521 ClassSet DisequalToFirst = First.getDisequalClasses(State);
2522 if (DisequalToFirst.contains(Second))
2523 return false;
2524
2525 // It is not clear.
2526 return llvm::None;
2527}
2528
2529[[nodiscard]] ProgramStateRef
2530EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2531
2532 SymbolSet ClsMembers = getClassMembers(State);
2533 assert(ClsMembers.contains(Old));
2534
2535 // Remove `Old`'s Class->Sym relation.
2536 SymbolSet::Factory &F = getMembersFactory(State);
2537 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2538 ClsMembers = F.remove(ClsMembers, Old);
2539 // Ensure another precondition of the removeMember function (we can check
2540 // this only with isEmpty, thus we have to do the remove first).
2541 assert(!ClsMembers.isEmpty() &&
2542 "Class should have had at least two members before member removal");
2543 // Overwrite the existing members assigned to this class.
2544 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2545 ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2546 State = State->set<ClassMembers>(ClassMembersMap);
2547
2548 // Remove `Old`'s Sym->Class relation.
2549 ClassMapTy Classes = State->get<ClassMap>();
2550 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2551 Classes = CMF.remove(Classes, Old);
2552 State = State->set<ClassMap>(Classes);
2553
2554 return State;
2555}
2556
2557// Re-evaluate an SVal with top-level `State->assume` logic.
2558[[nodiscard]] ProgramStateRef
2559reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
2560 if (!Constraint)
2561 return State;
2562
2563 const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2564
2565 // If the SVal is 0, we can simply interpret that as `false`.
2566 if (Constraint->encodesFalseRange())
2567 return State->assume(DefinedVal, false);
2568
2569 // If the constraint does not encode 0 then we can interpret that as `true`
2570 // AND as a Range(Set).
2571 if (Constraint->encodesTrueRange()) {
2572 State = State->assume(DefinedVal, true);
2573 if (!State)
2574 return nullptr;
2575 // Fall through, re-assume based on the range values as well.
2576 }
2577 // Overestimate the individual Ranges with the RangeSet' lowest and
2578 // highest values.
2579 return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2580 Constraint->getMaxValue(), true);
2581}
2582
2583// Iterate over all symbols and try to simplify them. Once a symbol is
2584// simplified then we check if we can merge the simplified symbol's equivalence
2585// class to this class. This way, we simplify not just the symbols but the
2586// classes as well: we strive to keep the number of the classes to be the
2587// absolute minimum.
2588[[nodiscard]] ProgramStateRef
2589EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2590 ProgramStateRef State, EquivalenceClass Class) {
2591 SymbolSet ClassMembers = Class.getClassMembers(State);
2592 for (const SymbolRef &MemberSym : ClassMembers) {
2593
2594 const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2595 const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2596
2597 // The symbol is collapsed to a constant, check if the current State is
2598 // still feasible.
2599 if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2600 const llvm::APSInt &SV = CI->getValue();
2601 const RangeSet *ClassConstraint = getConstraint(State, Class);
2602 // We have found a contradiction.
2603 if (ClassConstraint && !ClassConstraint->contains(SV))
2604 return nullptr;
2605 }
2606
2607 if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2608 // The simplified symbol should be the member of the original Class,
2609 // however, it might be in another existing class at the moment. We
2610 // have to merge these classes.
2611 ProgramStateRef OldState = State;
2612 State = merge(F, State, MemberSym, SimplifiedMemberSym);
2613 if (!State)
2614 return nullptr;
2615 // No state change, no merge happened actually.
2616 if (OldState == State)
2617 continue;
2618
2619 assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
2620 // Remove the old and more complex symbol.
2621 State = find(State, MemberSym).removeMember(State, MemberSym);
2622
2623 // Query the class constraint again b/c that may have changed during the
2624 // merge above.
2625 const RangeSet *ClassConstraint = getConstraint(State, Class);
2626
2627 // Re-evaluate an SVal with top-level `State->assume`, this ignites
2628 // a RECURSIVE algorithm that will reach a FIXPOINT.
2629 //
2630 // About performance and complexity: Let us assume that in a State we
2631 // have N non-trivial equivalence classes and that all constraints and
2632 // disequality info is related to non-trivial classes. In the worst case,
2633 // we can simplify only one symbol of one class in each iteration. The
2634 // number of symbols in one class cannot grow b/c we replace the old
2635 // symbol with the simplified one. Also, the number of the equivalence
2636 // classes can decrease only, b/c the algorithm does a merge operation
2637 // optionally. We need N iterations in this case to reach the fixpoint.
2638 // Thus, the steps needed to be done in the worst case is proportional to
2639 // N*N.
2640 //
2641 // This worst case scenario can be extended to that case when we have
2642 // trivial classes in the constraints and in the disequality map. This
2643 // case can be reduced to the case with a State where there are only
2644 // non-trivial classes. This is because a merge operation on two trivial
2645 // classes results in one non-trivial class.
2646 State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2647 if (!State)
2648 return nullptr;
2649 }
2650 }
2651 return State;
2652}
2653
2654inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2655 SymbolRef Sym) {
2656 return find(State, Sym).getDisequalClasses(State);
2657}
2658
2659inline ClassSet
2660EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2661 return getDisequalClasses(State->get<DisequalityMap>(),
2662 State->get_context<ClassSet>());
2663}
2664
2665inline ClassSet
2666EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2667 ClassSet::Factory &Factory) const {
2668 if (const ClassSet *DisequalClasses = Map.lookup(*this))
2669 return *DisequalClasses;
2670
2671 return Factory.getEmptySet();
2672}
2673
2674bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2675 ClassMembersTy Members = State->get<ClassMembers>();
2676
2677 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2678 for (SymbolRef Member : ClassMembersPair.second) {
2679 // Every member of the class should have a mapping back to the class.
2680 if (find(State, Member) == ClassMembersPair.first) {
2681 continue;
2682 }
2683
2684 return false;
2685 }
2686 }
2687
2688 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2689 for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2690 EquivalenceClass Class = DisequalityInfo.first;
2691 ClassSet DisequalClasses = DisequalityInfo.second;
2692
2693 // There is no use in keeping empty sets in the map.
2694 if (DisequalClasses.isEmpty())
2695 return false;
2696
2697 // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2698 // B != A should also be true.
2699 for (EquivalenceClass DisequalClass : DisequalClasses) {
2700 const ClassSet *DisequalToDisequalClasses =
2701 Disequalities.lookup(DisequalClass);
2702
2703 // It should be a set of at least one element: Class
2704 if (!DisequalToDisequalClasses ||
2705 !DisequalToDisequalClasses->contains(Class))
2706 return false;
2707 }
2708 }
2709
2710 return true;
2711}
2712
2713//===----------------------------------------------------------------------===//
2714// RangeConstraintManager implementation
2715//===----------------------------------------------------------------------===//
2716
2717bool RangeConstraintManager::canReasonAbout(SVal X) const {
2718 Optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2719 if (SymVal && SymVal->isExpression()) {
2720 const SymExpr *SE = SymVal->getSymbol();
2721
2722 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2723 switch (SIE->getOpcode()) {
2724 // We don't reason yet about bitwise-constraints on symbolic values.
2725 case BO_And:
2726 case BO_Or:
2727 case BO_Xor:
2728 return false;
2729 // We don't reason yet about these arithmetic constraints on
2730 // symbolic values.
2731 case BO_Mul:
2732 case BO_Div:
2733 case BO_Rem:
2734 case BO_Shl:
2735 case BO_Shr:
2736 return false;
2737 // All other cases.
2738 default:
2739 return true;
2740 }
2741 }
2742
2743 if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2744 // FIXME: Handle <=> here.
2745 if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
2746 BinaryOperator::isRelationalOp(SSE->getOpcode())) {
2747 // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2748 // We've recently started producing Loc <> NonLoc comparisons (that
2749 // result from casts of one of the operands between eg. intptr_t and
2750 // void *), but we can't reason about them yet.
2751 if (Loc::isLocType(SSE->getLHS()->getType())) {
2752 return Loc::isLocType(SSE->getRHS()->getType());
2753 }
2754 }
2755 }
2756
2757 return false;
2758 }
2759
2760 return true;
2761}
2762
2763ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2764 SymbolRef Sym) {
2765 const RangeSet *Ranges = getConstraint(State, Sym);
2766
2767 // If we don't have any information about this symbol, it's underconstrained.
2768 if (!Ranges)
2769 return ConditionTruthVal();
2770
2771 // If we have a concrete value, see if it's zero.
2772 if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2773 return *Value == 0;
2774
2775 BasicValueFactory &BV = getBasicVals();
2776 APSIntType IntType = BV.getAPSIntType(Sym->getType());
2777 llvm::APSInt Zero = IntType.getZeroValue();
2778
2779 // Check if zero is in the set of possible values.
2780 if (!Ranges->contains(Zero))
2781 return false;
2782
2783 // Zero is a possible value, but it is not the /only/ possible value.
2784 return ConditionTruthVal();
2785}
2786
2787const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2788 SymbolRef Sym) const {
2789 const RangeSet *T = getConstraint(St, Sym);
2790 return T ? T->getConcreteValue() : nullptr;
2791}
2792
2793//===----------------------------------------------------------------------===//
2794// Remove dead symbols from existing constraints
2795//===----------------------------------------------------------------------===//
2796
2797/// Scan all symbols referenced by the constraints. If the symbol is not alive
2798/// as marked in LSymbols, mark it as dead in DSymbols.
2799ProgramStateRef
2800RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2801 SymbolReaper &SymReaper) {
2802 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2803 ClassMembersTy NewClassMembersMap = ClassMembersMap;
2804 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2805 SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2806
2807 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2808 ConstraintRangeTy NewConstraints = Constraints;
2809 ConstraintRangeTy::Factory &ConstraintFactory =
2810 State->get_context<ConstraintRange>();
2811
2812 ClassMapTy Map = State->get<ClassMap>();
2813 ClassMapTy NewMap = Map;
2814 ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2815
2816 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2817 DisequalityMapTy::Factory &DisequalityFactory =
2818 State->get_context<DisequalityMap>();
2819 ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2820
2821 bool ClassMapChanged = false;
2822 bool MembersMapChanged = false;
2823 bool ConstraintMapChanged = false;
2824 bool DisequalitiesChanged = false;
2825
2826 auto removeDeadClass = [&](EquivalenceClass Class) {
2827 // Remove associated constraint ranges.
2828 Constraints = ConstraintFactory.remove(Constraints, Class);
2829 ConstraintMapChanged = true;
2830
2831 // Update disequality information to not hold any information on the
2832 // removed class.
2833 ClassSet DisequalClasses =
2834 Class.getDisequalClasses(Disequalities, ClassSetFactory);
2835 if (!DisequalClasses.isEmpty()) {
2836 for (EquivalenceClass DisequalClass : DisequalClasses) {
2837 ClassSet DisequalToDisequalSet =
2838 DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2839 // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2840 // disequality info.
2841 assert(!DisequalToDisequalSet.isEmpty());
2842 ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2843
2844 // No need in keeping an empty set.
2845 if (NewSet.isEmpty()) {
2846 Disequalities =
2847 DisequalityFactory.remove(Disequalities, DisequalClass);
2848 } else {
2849 Disequalities =
2850 DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2851 }
2852 }
2853 // Remove the data for the class
2854 Disequalities = DisequalityFactory.remove(Disequalities, Class);
2855 DisequalitiesChanged = true;
2856 }
2857 };
2858
2859 // 1. Let's see if dead symbols are trivial and have associated constraints.
2860 for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2861 Constraints) {
2862 EquivalenceClass Class = ClassConstraintPair.first;
2863 if (Class.isTriviallyDead(State, SymReaper)) {
2864 // If this class is trivial, we can remove its constraints right away.
2865 removeDeadClass(Class);
2866 }
2867 }
2868
2869 // 2. We don't need to track classes for dead symbols.
2870 for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2871 SymbolRef Sym = SymbolClassPair.first;
2872
2873 if (SymReaper.isDead(Sym)) {
2874 ClassMapChanged = true;
2875 NewMap = ClassFactory.remove(NewMap, Sym);
2876 }
2877 }
2878
2879 // 3. Remove dead members from classes and remove dead non-trivial classes
2880 // and their constraints.
2881 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2882 ClassMembersMap) {
2883 EquivalenceClass Class = ClassMembersPair.first;
2884 SymbolSet LiveMembers = ClassMembersPair.second;
2885 bool MembersChanged = false;
2886
2887 for (SymbolRef Member : ClassMembersPair.second) {
2888 if (SymReaper.isDead(Member)) {
2889 MembersChanged = true;
2890 LiveMembers = SetFactory.remove(LiveMembers, Member);
2891 }
2892 }
2893
2894 // Check if the class changed.
2895 if (!MembersChanged)
2896 continue;
2897
2898 MembersMapChanged = true;
2899
2900 if (LiveMembers.isEmpty()) {
2901 // The class is dead now, we need to wipe it out of the members map...
2902 NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2903
2904 // ...and remove all of its constraints.
2905 removeDeadClass(Class);
2906 } else {
2907 // We need to change the members associated with the class.
2908 NewClassMembersMap =
2909 EMFactory.add(NewClassMembersMap, Class, LiveMembers);
2910 }
2911 }
2912
2913 // 4. Update the state with new maps.
2914 //
2915 // Here we try to be humble and update a map only if it really changed.
2916 if (ClassMapChanged)
2917 State = State->set<ClassMap>(NewMap);
2918
2919 if (MembersMapChanged)
2920 State = State->set<ClassMembers>(NewClassMembersMap);
2921
2922 if (ConstraintMapChanged)
2923 State = State->set<ConstraintRange>(Constraints);
2924
2925 if (DisequalitiesChanged)
2926 State = State->set<DisequalityMap>(Disequalities);
2927
2928 assert(EquivalenceClass::isClassDataConsistent(State));
2929
2930 return State;
2931}
2932
2933RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
2934 SymbolRef Sym) {
2935 return SymbolicRangeInferrer::inferRange(F, State, Sym);
2936}
2937
2938ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
2939 SymbolRef Sym,
2940 RangeSet Range) {
2941 return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
2942}
2943
2944//===------------------------------------------------------------------------===
2945// assumeSymX methods: protected interface for RangeConstraintManager.
2946//===------------------------------------------------------------------------===/
2947
2948// The syntax for ranges below is mathematical, using [x, y] for closed ranges
2949// and (x, y) for open ranges. These ranges are modular, corresponding with
2950// a common treatment of C integer overflow. This means that these methods
2951// do not have to worry about overflow; RangeSet::Intersect can handle such a
2952// "wraparound" range.
2953// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
2954// UINT_MAX, 0, 1, and 2.
2955
2956ProgramStateRef
2957RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
2958 const llvm::APSInt &Int,
2959 const llvm::APSInt &Adjustment) {
2960 // Before we do any real work, see if the value can even show up.
2961 APSIntType AdjustmentType(Adjustment);
2962 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2963 return St;
2964
2965 llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
2966 RangeSet New = getRange(St, Sym);
2967 New = F.deletePoint(New, Point);
2968
2969 return setRange(St, Sym, New);
2970}
2971
2972ProgramStateRef
2973RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
2974 const llvm::APSInt &Int,
2975 const llvm::APSInt &Adjustment) {
2976 // Before we do any real work, see if the value can even show up.
2977 APSIntType AdjustmentType(Adjustment);
2978 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2979 return nullptr;
2980
2981 // [Int-Adjustment, Int-Adjustment]
2982 llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
2983 RangeSet New = getRange(St, Sym);
2984 New = F.intersect(New, AdjInt);
2985
2986 return setRange(St, Sym, New);
2987}
2988
2989RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
2990 SymbolRef Sym,
2991 const llvm::APSInt &Int,
2992 const llvm::APSInt &Adjustment) {
2993 // Before we do any real work, see if the value can even show up.
2994 APSIntType AdjustmentType(Adjustment);
2995 switch (AdjustmentType.testInRange(Int, true)) {
2996 case APSIntType::RTR_Below:
2997 return F.getEmptySet();
2998 case APSIntType::RTR_Within:
2999 break;
3000 case APSIntType::RTR_Above:
3001 return getRange(St, Sym);
3002 }
3003
3004 // Special case for Int == Min. This is always false.
3005 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3006 llvm::APSInt Min = AdjustmentType.getMinValue();
3007 if (ComparisonVal == Min)
3008 return F.getEmptySet();
3009
3010 llvm::APSInt Lower = Min - Adjustment;
3011 llvm::APSInt Upper = ComparisonVal - Adjustment;
3012 --Upper;
3013
3014 RangeSet Result = getRange(St, Sym);
3015 return F.intersect(Result, Lower, Upper);
3016}
3017
3018ProgramStateRef
3019RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3020 const llvm::APSInt &Int,
3021 const llvm::APSInt &Adjustment) {
3022 RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3023 return setRange(St, Sym, New);
3024}
3025
3026RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3027 SymbolRef Sym,
3028 const llvm::APSInt &Int,
3029 const llvm::APSInt &Adjustment) {
3030 // Before we do any real work, see if the value can even show up.
3031 APSIntType AdjustmentType(Adjustment);
3032 switch (AdjustmentType.testInRange(Int, true)) {
3033 case APSIntType::RTR_Below:
3034 return getRange(St, Sym);
3035 case APSIntType::RTR_Within:
3036 break;
3037 case APSIntType::RTR_Above:
3038 return F.getEmptySet();
3039 }
3040
3041 // Special case for Int == Max. This is always false.
3042 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3043 llvm::APSInt Max = AdjustmentType.getMaxValue();
3044 if (ComparisonVal == Max)
3045 return F.getEmptySet();
3046
3047 llvm::APSInt Lower = ComparisonVal - Adjustment;
3048 llvm::APSInt Upper = Max - Adjustment;
3049 ++Lower;
3050
3051 RangeSet SymRange = getRange(St, Sym);
3052 return F.intersect(SymRange, Lower, Upper);
3053}
3054
3055ProgramStateRef
3056RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3057 const llvm::APSInt &Int,
3058 const llvm::APSInt &Adjustment) {
3059 RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3060 return setRange(St, Sym, New);
3061}
3062
3063RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3064 SymbolRef Sym,
3065 const llvm::APSInt &Int,
3066 const llvm::APSInt &Adjustment) {
3067 // Before we do any real work, see if the value can even show up.
3068 APSIntType AdjustmentType(Adjustment);
3069 switch (AdjustmentType.testInRange(Int, true)) {
3070 case APSIntType::RTR_Below:
3071 return getRange(St, Sym);
3072 case APSIntType::RTR_Within:
3073 break;
3074 case APSIntType::RTR_Above:
3075 return F.getEmptySet();
3076 }
3077
3078 // Special case for Int == Min. This is always feasible.
3079 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3080 llvm::APSInt Min = AdjustmentType.getMinValue();
3081 if (ComparisonVal == Min)
3082 return getRange(St, Sym);
3083
3084 llvm::APSInt Max = AdjustmentType.getMaxValue();
3085 llvm::APSInt Lower = ComparisonVal - Adjustment;
3086 llvm::APSInt Upper = Max - Adjustment;
3087
3088 RangeSet SymRange = getRange(St, Sym);
3089 return F.intersect(SymRange, Lower, Upper);
3090}
3091
3092ProgramStateRef
3093RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3094 const llvm::APSInt &Int,
3095 const llvm::APSInt &Adjustment) {
3096 RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3097 return setRange(St, Sym, New);
3098}
3099
3100RangeSet
3101RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3102 const llvm::APSInt &Int,
3103 const llvm::APSInt &Adjustment) {
3104 // Before we do any real work, see if the value can even show up.
3105 APSIntType AdjustmentType(Adjustment);
3106 switch (AdjustmentType.testInRange(Int, true)) {
3107 case APSIntType::RTR_Below:
3108 return F.getEmptySet();
3109 case APSIntType::RTR_Within:
3110 break;
3111 case APSIntType::RTR_Above:
3112 return RS();
3113 }
3114
3115 // Special case for Int == Max. This is always feasible.
3116 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3117 llvm::APSInt Max = AdjustmentType.getMaxValue();
3118 if (ComparisonVal == Max)
3119 return RS();
3120
3121 llvm::APSInt Min = AdjustmentType.getMinValue();
3122 llvm::APSInt Lower = Min - Adjustment;
3123 llvm::APSInt Upper = ComparisonVal - Adjustment;
3124
3125 RangeSet Default = RS();
3126 return F.intersect(Default, Lower, Upper);
3127}
3128
3129RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3130 SymbolRef Sym,
3131 const llvm::APSInt &Int,
3132 const llvm::APSInt &Adjustment) {
3133 return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3134}
3135
3136ProgramStateRef
3137RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3138 const llvm::APSInt &Int,
3139 const llvm::APSInt &Adjustment) {
3140 RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3141 return setRange(St, Sym, New);
3142}
3143
3144ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3145 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3146 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3147 RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3148 if (New.isEmpty())
3149 return nullptr;
3150 RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3151 return setRange(State, Sym, Out);
3152}
3153
3154ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3155 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3156 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3157 RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3158 RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3159 RangeSet New(F.add(RangeLT, RangeGT));
3160 return setRange(State, Sym, New);
3161}
3162
3163//===----------------------------------------------------------------------===//
3164// Pretty-printing.
3165//===----------------------------------------------------------------------===//
3166
3167void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3168 const char *NL, unsigned int Space,
3169 bool IsDot) const {
3170 printConstraints(Out, State, NL, Space, IsDot);
3171 printEquivalenceClasses(Out, State, NL, Space, IsDot);
3172 printDisequalities(Out, State, NL, Space, IsDot);
3173}
3174
3175void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3176 SymbolRef Sym) {
3177 const RangeSet RS = getRange(State, Sym);
3178 Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3179 RS.dump(Out);
3180}
3181
3182static std::string toString(const SymbolRef &Sym) {
3183 std::string S;
3184 llvm::raw_string_ostream O(S);
3185 Sym->dumpToStream(O);
3186 return O.str();
3187}
3188
3189void RangeConstraintManager::printConstraints(raw_ostream &Out,
3190 ProgramStateRef State,
3191 const char *NL,
3192 unsigned int Space,
3193 bool IsDot) const {
3194 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3195
3196 Indent(Out, Space, IsDot) << "\"constraints\": ";
3197 if (Constraints.isEmpty()) {
3198 Out << "null," << NL;
3199 return;
3200 }
3201
3202 std::map<std::string, RangeSet> OrderedConstraints;
3203 for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3204 SymbolSet ClassMembers = P.first.getClassMembers(State);
3205 for (const SymbolRef &ClassMember : ClassMembers) {
3206 bool insertion_took_place;
3207 std::tie(std::ignore, insertion_took_place) =
3208 OrderedConstraints.insert({toString(ClassMember), P.second});
3209 assert(insertion_took_place &&
3210 "two symbols should not have the same dump");
3211 }
3212 }
3213
3214 ++Space;
3215 Out << '[' << NL;
3216 bool First = true;
3217 for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3218 if (First) {
3219 First = false;
3220 } else {
3221 Out << ',';
3222 Out << NL;
3223 }
3224 Indent(Out, Space, IsDot)
3225 << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3226 P.second.dump(Out);
3227 Out << "\" }";
3228 }
3229 Out << NL;
3230
3231 --Space;
3232 Indent(Out, Space, IsDot) << "]," << NL;
3233}
3234
3235static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3236 SymbolSet ClassMembers = Class.getClassMembers(State);
3237 llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3238 ClassMembers.end());
3239 llvm::sort(ClassMembersSorted,
3240 [](const SymbolRef &LHS, const SymbolRef &RHS) {
3241 return toString(LHS) < toString(RHS);
3242 });
3243
3244 bool FirstMember = true;
3245
3246 std::string Str;
3247 llvm::raw_string_ostream Out(Str);
3248 Out << "[ ";
3249 for (SymbolRef ClassMember : ClassMembersSorted) {
3250 if (FirstMember)
3251 FirstMember = false;
3252 else
3253 Out << ", ";
3254 Out << "\"" << ClassMember << "\"";
3255 }
3256 Out << " ]";
3257 return Out.str();
3258}
3259
3260void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3261 ProgramStateRef State,
3262 const char *NL,
3263 unsigned int Space,
3264 bool IsDot) const {
3265 ClassMembersTy Members = State->get<ClassMembers>();
3266
3267 Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3268 if (Members.isEmpty()) {
3269 Out << "null," << NL;
3270 return;
3271 }
3272
3273 std::set<std::string> MembersStr;
3274 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3275 MembersStr.insert(toString(State, ClassToSymbolSet.first));
3276
3277 ++Space;
3278 Out << '[' << NL;
3279 bool FirstClass = true;
3280 for (const std::string &Str : MembersStr) {
3281 if (FirstClass) {
3282 FirstClass = false;
3283 } else {
3284 Out << ',';
3285 Out << NL;
3286 }
3287 Indent(Out, Space, IsDot);
3288 Out << Str;
3289 }
3290 Out << NL;
3291
3292 --Space;
3293 Indent(Out, Space, IsDot) << "]," << NL;
3294}
3295
3296void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3297 ProgramStateRef State,
3298 const char *NL,
3299 unsigned int Space,
3300 bool IsDot) const {
3301 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3302
3303 Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3304 if (Disequalities.isEmpty()) {
3305 Out << "null," << NL;
3306 return;
3307 }
3308
3309 // Transform the disequality info to an ordered map of
3310 // [string -> (ordered set of strings)]
3311 using EqClassesStrTy = std::set<std::string>;
3312 using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3313 DisequalityInfoStrTy DisequalityInfoStr;
3314 for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3315 EquivalenceClass Class = ClassToDisEqSet.first;
3316 ClassSet DisequalClasses = ClassToDisEqSet.second;
3317 EqClassesStrTy MembersStr;
3318 for (EquivalenceClass DisEqClass : DisequalClasses)
3319 MembersStr.insert(toString(State, DisEqClass));
3320 DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3321 }
3322
3323 ++Space;
3324 Out << '[' << NL;
3325 bool FirstClass = true;
3326 for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3327 DisequalityInfoStr) {
3328 const std::string &Class = ClassToDisEqSet.first;
3329 if (FirstClass) {
3330 FirstClass = false;
3331 } else {
3332 Out << ',';
3333 Out << NL;
3334 }
3335 Indent(Out, Space, IsDot) << "{" << NL;
3336 unsigned int DisEqSpace = Space + 1;
3337 Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3338 Out << Class;
3339 const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3340 if (!DisequalClasses.empty()) {
3341 Out << "," << NL;
3342 Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3343 unsigned int DisEqClassSpace = DisEqSpace + 1;
3344 Indent(Out, DisEqClassSpace, IsDot);
3345 bool FirstDisEqClass = true;
3346 for (const std::string &DisEqClass : DisequalClasses) {
3347 if (FirstDisEqClass) {
3348 FirstDisEqClass = false;
3349 } else {
3350 Out << ',' << NL;
3351 Indent(Out, DisEqClassSpace, IsDot);
3352 }
3353 Out << DisEqClass;
3354 }
3355 Out << "]" << NL;
3356 }
3357 Indent(Out, Space, IsDot) << "}";
3358 }
3359 Out << NL;
3360
3361 --Space;
3362 Indent(Out, Space, IsDot) << "]," << NL;
3363}
3364

source code of clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp