1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
14#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
18#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
19#include <optional>
20
21using namespace clang;
22using namespace ento;
23
24namespace {
25class SimpleSValBuilder : public SValBuilder {
26
27 // Query the constraint manager whether the SVal has only one possible
28 // (integer) value. If that is the case, the value is returned. Otherwise,
29 // returns NULL.
30 // This is an implementation detail. Checkers should use `getKnownValue()`
31 // instead.
32 static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
33
34 // Helper function that returns the value stored in a nonloc::ConcreteInt or
35 // loc::ConcreteInt.
36 static const llvm::APSInt *getConcreteValue(SVal V);
37
38 // With one `simplifySValOnce` call, a compound symbols might collapse to
39 // simpler symbol tree that is still possible to further simplify. Thus, we
40 // do the simplification on a new symbol tree until we reach the simplest
41 // form, i.e. the fixpoint.
42 // Consider the following symbol `(b * b) * b * b` which has this tree:
43 // *
44 // / \
45 // * b
46 // / \
47 // / b
48 // (b * b)
49 // Now, if the `b * b == 1` new constraint is added then during the first
50 // iteration we have the following transformations:
51 // * *
52 // / \ / \
53 // * b --> b b
54 // / \
55 // / b
56 // 1
57 // We need another iteration to reach the final result `1`.
58 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
59
60 // Recursively descends into symbolic expressions and replaces symbols
61 // with their known values (in the sense of the getConstValue() method).
62 // We traverse the symbol tree and query the constraint values for the
63 // sub-trees and if a value is a constant we do the constant folding.
64 SVal simplifySValOnce(ProgramStateRef State, SVal V);
65
66public:
67 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
68 ProgramStateManager &stateMgr)
69 : SValBuilder(alloc, context, stateMgr) {}
70 ~SimpleSValBuilder() override {}
71
72 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
73 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
74 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
75 Loc lhs, Loc rhs, QualType resultTy) override;
76 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
77 Loc lhs, NonLoc rhs, QualType resultTy) override;
78
79 /// Evaluates a given SVal by recursively evaluating and
80 /// simplifying the children SVals. If the SVal has only one possible
81 /// (integer) value, that value is returned. Otherwise, returns NULL.
82 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
83
84 /// Evaluates a given SVal by recursively evaluating and simplifying the
85 /// children SVals, then returns its minimal possible (integer) value. If the
86 /// constraint manager cannot provide a meaningful answer, this returns NULL.
87 const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
88
89 /// Evaluates a given SVal by recursively evaluating and simplifying the
90 /// children SVals, then returns its maximal possible (integer) value. If the
91 /// constraint manager cannot provide a meaningful answer, this returns NULL.
92 const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
93
94 SVal simplifySVal(ProgramStateRef State, SVal V) override;
95
96 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
97 const llvm::APSInt &RHS, QualType resultTy);
98};
99} // end anonymous namespace
100
101SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
102 ASTContext &context,
103 ProgramStateManager &stateMgr) {
104 return new SimpleSValBuilder(alloc, context, stateMgr);
105}
106
107// Checks if the negation the value and flipping sign preserve
108// the semantics on the operation in the resultType
109static bool isNegationValuePreserving(const llvm::APSInt &Value,
110 APSIntType ResultType) {
111 const unsigned ValueBits = Value.getSignificantBits();
112 if (ValueBits == ResultType.getBitWidth()) {
113 // The value is the lowest negative value that is representable
114 // in signed integer with bitWith of result type. The
115 // negation is representable if resultType is unsigned.
116 return ResultType.isUnsigned();
117 }
118
119 // If resultType bitWith is higher that number of bits required
120 // to represent RHS, the sign flip produce same value.
121 return ValueBits < ResultType.getBitWidth();
122}
123
124//===----------------------------------------------------------------------===//
125// Transfer function for binary operators.
126//===----------------------------------------------------------------------===//
127
128SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
129 BinaryOperator::Opcode op,
130 const llvm::APSInt &RHS,
131 QualType resultTy) {
132 bool isIdempotent = false;
133
134 // Check for a few special cases with known reductions first.
135 switch (op) {
136 default:
137 // We can't reduce this case; just treat it normally.
138 break;
139 case BO_Mul:
140 // a*0 and a*1
141 if (RHS == 0)
142 return makeIntVal(0, resultTy);
143 else if (RHS == 1)
144 isIdempotent = true;
145 break;
146 case BO_Div:
147 // a/0 and a/1
148 if (RHS == 0)
149 // This is also handled elsewhere.
150 return UndefinedVal();
151 else if (RHS == 1)
152 isIdempotent = true;
153 break;
154 case BO_Rem:
155 // a%0 and a%1
156 if (RHS == 0)
157 // This is also handled elsewhere.
158 return UndefinedVal();
159 else if (RHS == 1)
160 return makeIntVal(0, resultTy);
161 break;
162 case BO_Add:
163 case BO_Sub:
164 case BO_Shl:
165 case BO_Shr:
166 case BO_Xor:
167 // a+0, a-0, a<<0, a>>0, a^0
168 if (RHS == 0)
169 isIdempotent = true;
170 break;
171 case BO_And:
172 // a&0 and a&(~0)
173 if (RHS == 0)
174 return makeIntVal(0, resultTy);
175 else if (RHS.isAllOnes())
176 isIdempotent = true;
177 break;
178 case BO_Or:
179 // a|0 and a|(~0)
180 if (RHS == 0)
181 isIdempotent = true;
182 else if (RHS.isAllOnes()) {
183 return nonloc::ConcreteInt(BasicVals.Convert(T: resultTy, From: RHS));
184 }
185 break;
186 }
187
188 // Idempotent ops (like a*1) can still change the type of an expression.
189 // Wrap the LHS up in a NonLoc again and let evalCast do the
190 // dirty work.
191 if (isIdempotent)
192 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
193
194 // If we reach this point, the expression cannot be simplified.
195 // Make a SymbolVal for the entire expression, after converting the RHS.
196 std::optional<APSIntPtr> ConvertedRHS = BasicVals.getValue(X: RHS);
197 if (BinaryOperator::isComparisonOp(Opc: op)) {
198 // We're looking for a type big enough to compare the symbolic value
199 // with the given constant.
200 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201 ASTContext &Ctx = getContext();
202 QualType SymbolType = LHS->getType();
203 uint64_t ValWidth = RHS.getBitWidth();
204 uint64_t TypeWidth = Ctx.getTypeSize(T: SymbolType);
205
206 if (ValWidth < TypeWidth) {
207 // If the value is too small, extend it.
208 ConvertedRHS = BasicVals.Convert(T: SymbolType, From: RHS);
209 } else if (ValWidth == TypeWidth) {
210 // If the value is signed but the symbol is unsigned, do the comparison
211 // in unsigned space. [C99 6.3.1.8]
212 // (For the opposite case, the value is already unsigned.)
213 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214 ConvertedRHS = BasicVals.Convert(T: SymbolType, From: RHS);
215 }
216 } else if (BinaryOperator::isAdditiveOp(Opc: op) && RHS.isNegative()) {
217 // Change a+(-N) into a-N, and a-(-N) into a+N
218 // Adjust addition/subtraction of negative value, to
219 // subtraction/addition of the negated value.
220 APSIntType resultIntTy = BasicVals.getAPSIntType(T: resultTy);
221 if (isNegationValuePreserving(Value: RHS, ResultType: resultIntTy)) {
222 ConvertedRHS = BasicVals.getValue(X: -resultIntTy.convert(Value: RHS));
223 op = (op == BO_Add) ? BO_Sub : BO_Add;
224 } else {
225 ConvertedRHS = BasicVals.Convert(T: resultTy, From: RHS);
226 }
227 } else
228 ConvertedRHS = BasicVals.Convert(T: resultTy, From: RHS);
229
230 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
231}
232
233// See if Sym is known to be a relation Rel with Bound.
234static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
235 llvm::APSInt Bound, ProgramStateRef State) {
236 SValBuilder &SVB = State->getStateManager().getSValBuilder();
237 BasicValueFactory &BV = SVB.getBasicValueFactory();
238 SVal Result = SVB.evalBinOpNN(state: State, op: Rel, lhs: nonloc::SymbolVal(Sym),
239 rhs: nonloc::ConcreteInt(BV.getValue(X: Bound)),
240 resultTy: SVB.getConditionType());
241 if (auto DV = Result.getAs<DefinedSVal>()) {
242 return !State->assume(Cond: *DV, Assumption: false);
243 }
244 return false;
245}
246
247// See if Sym is known to be within [min/4, max/4], where min and max
248// are the bounds of the symbol's integral type. With such symbols,
249// some manipulations can be performed without the risk of overflow.
250// assume() doesn't cause infinite recursion because we should be dealing
251// with simpler symbols on every recursive call.
252static bool isWithinConstantOverflowBounds(SymbolRef Sym,
253 ProgramStateRef State) {
254 SValBuilder &SVB = State->getStateManager().getSValBuilder();
255 BasicValueFactory &BV = SVB.getBasicValueFactory();
256
257 QualType T = Sym->getType();
258 assert(T->isSignedIntegerOrEnumerationType() &&
259 "This only works with signed integers!");
260 APSIntType AT = BV.getAPSIntType(T);
261
262 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4), Min = -Max;
263 return isInRelation(Rel: BO_LE, Sym, Bound: Max, State) &&
264 isInRelation(Rel: BO_GE, Sym, Bound: Min, State);
265}
266
267// Same for the concrete integers: see if I is within [min/4, max/4].
268static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
269 APSIntType AT(I);
270 assert(!AT.isUnsigned() &&
271 "This only works with signed integers!");
272
273 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4);
274 return (I <= Max) && (I >= -Max);
275}
276
277static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym,
278 BasicValueFactory &BV) {
279 if (const auto *SymInt = dyn_cast<SymIntExpr>(Val: Sym))
280 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
281 return std::make_pair(SymInt->getLHS(),
282 (SymInt->getOpcode() == BO_Add)
283 ? BV.getValue(X: SymInt->getRHS())
284 : BV.getValue(X: -SymInt->getRHS()));
285
286 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
287 return std::make_pair(x&: Sym, y: BV.getValue(X: 0, T: Sym->getType()));
288}
289
290// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
291// same signed integral type and no overflows occur (which should be checked
292// by the caller).
293static NonLoc doRearrangeUnchecked(ProgramStateRef State,
294 BinaryOperator::Opcode Op,
295 SymbolRef LSym, llvm::APSInt LInt,
296 SymbolRef RSym, llvm::APSInt RInt) {
297 SValBuilder &SVB = State->getStateManager().getSValBuilder();
298 BasicValueFactory &BV = SVB.getBasicValueFactory();
299 SymbolManager &SymMgr = SVB.getSymbolManager();
300
301 QualType SymTy = LSym->getType();
302 assert(SymTy == RSym->getType() &&
303 "Symbols are not of the same type!");
304 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
305 "Integers are not of the same type as symbols!");
306 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
307 "Integers are not of the same type as symbols!");
308
309 QualType ResultTy;
310 if (BinaryOperator::isComparisonOp(Opc: Op))
311 ResultTy = SVB.getConditionType();
312 else if (BinaryOperator::isAdditiveOp(Opc: Op))
313 ResultTy = SymTy;
314 else
315 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
316
317 if (LSym == RSym)
318 return SVB
319 .evalBinOpNN(state: State, op: Op, lhs: nonloc::ConcreteInt(BV.getValue(X: LInt)),
320 rhs: nonloc::ConcreteInt(BV.getValue(X: RInt)), resultTy: ResultTy)
321 .castAs<NonLoc>();
322
323 SymbolRef ResultSym = nullptr;
324 BinaryOperator::Opcode ResultOp;
325 llvm::APSInt ResultInt;
326 if (BinaryOperator::isComparisonOp(Opc: Op)) {
327 // Prefer comparing to a non-negative number.
328 // FIXME: Maybe it'd be better to have consistency in
329 // "$x - $y" vs. "$y - $x" because those are solver's keys.
330 if (LInt > RInt) {
331 ResultSym = SymMgr.acquire<SymSymExpr>(args&: RSym, args: BO_Sub, args&: LSym, args&: SymTy);
332 ResultOp = BinaryOperator::reverseComparisonOp(Opc: Op);
333 ResultInt = LInt - RInt; // Opposite order!
334 } else {
335 ResultSym = SymMgr.acquire<SymSymExpr>(args&: LSym, args: BO_Sub, args&: RSym, args&: SymTy);
336 ResultOp = Op;
337 ResultInt = RInt - LInt; // Opposite order!
338 }
339 } else {
340 ResultSym = SymMgr.acquire<SymSymExpr>(args&: LSym, args&: Op, args&: RSym, args&: SymTy);
341 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
342 ResultOp = BO_Add;
343 // Bring back the cosmetic difference.
344 if (ResultInt < 0) {
345 ResultInt = -ResultInt;
346 ResultOp = BO_Sub;
347 } else if (ResultInt == 0) {
348 // Shortcut: Simplify "$x + 0" to "$x".
349 return nonloc::SymbolVal(ResultSym);
350 }
351 }
352 APSIntPtr PersistentResultInt = BV.getValue(X: ResultInt);
353 return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>(
354 args&: ResultSym, args&: ResultOp, args&: PersistentResultInt, args&: ResultTy));
355}
356
357// Rearrange if symbol type matches the result type and if the operator is a
358// comparison operator, both symbol and constant must be within constant
359// overflow bounds.
360static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
361 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
362 return Sym->getType() == Ty &&
363 (!BinaryOperator::isComparisonOp(Opc: Op) ||
364 (isWithinConstantOverflowBounds(Sym, State) &&
365 isWithinConstantOverflowBounds(I: Int)));
366}
367
368static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
369 BinaryOperator::Opcode Op, NonLoc Lhs,
370 NonLoc Rhs, QualType ResultTy) {
371 ProgramStateManager &StateMgr = State->getStateManager();
372 SValBuilder &SVB = StateMgr.getSValBuilder();
373
374 // We expect everything to be of the same type - this type.
375 QualType SingleTy;
376
377 // FIXME: After putting complexity threshold to the symbols we can always
378 // rearrange additive operations but rearrange comparisons only if
379 // option is set.
380 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
381 return std::nullopt;
382
383 SymbolRef LSym = Lhs.getAsSymbol();
384 if (!LSym)
385 return std::nullopt;
386
387 if (BinaryOperator::isComparisonOp(Opc: Op)) {
388 SingleTy = LSym->getType();
389 if (ResultTy != SVB.getConditionType())
390 return std::nullopt;
391 // Initialize SingleTy later with a symbol's type.
392 } else if (BinaryOperator::isAdditiveOp(Opc: Op)) {
393 SingleTy = ResultTy;
394 if (LSym->getType() != SingleTy)
395 return std::nullopt;
396 } else {
397 // Don't rearrange other operations.
398 return std::nullopt;
399 }
400
401 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
402
403 // Rearrange signed symbolic expressions only
404 if (!SingleTy->isSignedIntegerOrEnumerationType())
405 return std::nullopt;
406
407 SymbolRef RSym = Rhs.getAsSymbol();
408 if (!RSym || RSym->getType() != SingleTy)
409 return std::nullopt;
410
411 BasicValueFactory &BV = State->getBasicVals();
412 llvm::APSInt LInt, RInt;
413 std::tie(args&: LSym, args&: LInt) = decomposeSymbol(Sym: LSym, BV);
414 std::tie(args&: RSym, args&: RInt) = decomposeSymbol(Sym: RSym, BV);
415 if (!shouldRearrange(State, Op, Sym: LSym, Int: LInt, Ty: SingleTy) ||
416 !shouldRearrange(State, Op, Sym: RSym, Int: RInt, Ty: SingleTy))
417 return std::nullopt;
418
419 // We know that no overflows can occur anymore.
420 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
421}
422
423SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
424 BinaryOperator::Opcode op,
425 NonLoc lhs, NonLoc rhs,
426 QualType resultTy) {
427 NonLoc InputLHS = lhs;
428 NonLoc InputRHS = rhs;
429
430 // Constraints may have changed since the creation of a bound SVal. Check if
431 // the values can be simplified based on those new constraints.
432 SVal simplifiedLhs = simplifySVal(State: state, V: lhs);
433 SVal simplifiedRhs = simplifySVal(State: state, V: rhs);
434 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
435 lhs = *simplifiedLhsAsNonLoc;
436 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
437 rhs = *simplifiedRhsAsNonLoc;
438
439 // Handle trivial case where left-side and right-side are the same.
440 if (lhs == rhs)
441 switch (op) {
442 default:
443 break;
444 case BO_EQ:
445 case BO_LE:
446 case BO_GE:
447 return makeTruthVal(true, resultTy);
448 case BO_LT:
449 case BO_GT:
450 case BO_NE:
451 return makeTruthVal(false, resultTy);
452 case BO_Xor:
453 case BO_Sub:
454 if (resultTy->isIntegralOrEnumerationType())
455 return makeIntVal(0, resultTy);
456 return evalCast(V: makeIntVal(0, /*isUnsigned=*/false), CastTy: resultTy,
457 OriginalTy: QualType{});
458 case BO_Or:
459 case BO_And:
460 return evalCast(lhs, resultTy, QualType{});
461 }
462
463 while (true) {
464 switch (lhs.getKind()) {
465 default:
466 return makeSymExprValNN(op, lhs, rhs, resultTy);
467 case nonloc::PointerToMemberKind: {
468 assert(rhs.getKind() == nonloc::PointerToMemberKind &&
469 "Both SVals should have pointer-to-member-type");
470 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
471 RPTM = rhs.castAs<nonloc::PointerToMember>();
472 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
473 switch (op) {
474 case BO_EQ:
475 return makeTruthVal(LPTMD == RPTMD, resultTy);
476 case BO_NE:
477 return makeTruthVal(LPTMD != RPTMD, resultTy);
478 default:
479 return UnknownVal();
480 }
481 }
482 case nonloc::LocAsIntegerKind: {
483 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
484 switch (rhs.getKind()) {
485 case nonloc::LocAsIntegerKind:
486 // FIXME: at the moment the implementation
487 // of modeling "pointers as integers" is not complete.
488 if (!BinaryOperator::isComparisonOp(Opc: op))
489 return UnknownVal();
490 return evalBinOpLL(state, op, lhs: lhsL,
491 rhs: rhs.castAs<nonloc::LocAsInteger>().getLoc(),
492 resultTy);
493 case nonloc::ConcreteIntKind: {
494 // FIXME: at the moment the implementation
495 // of modeling "pointers as integers" is not complete.
496 if (!BinaryOperator::isComparisonOp(Opc: op))
497 return UnknownVal();
498 // Transform the integer into a location and compare.
499 // FIXME: This only makes sense for comparisons. If we want to, say,
500 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
501 // then pack it back into a LocAsInteger.
502 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
503 // If the region has a symbolic base, pay attention to the type; it
504 // might be coming from a non-default address space. For non-symbolic
505 // regions it doesn't matter that much because such comparisons would
506 // most likely evaluate to concrete false anyway. FIXME: We might
507 // still need to handle the non-comparison case.
508 if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true))
509 BasicVals.getAPSIntType(T: lSym->getType()).apply(Value&: i);
510 else
511 BasicVals.getAPSIntType(T: Context.VoidPtrTy).apply(i);
512 return evalBinOpLL(state, op, lhs: lhsL, rhs: makeLoc(i), resultTy);
513 }
514 default:
515 switch (op) {
516 case BO_EQ:
517 return makeTruthVal(false, resultTy);
518 case BO_NE:
519 return makeTruthVal(true, resultTy);
520 default:
521 // This case also handles pointer arithmetic.
522 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
523 }
524 }
525 }
526 case nonloc::ConcreteIntKind: {
527 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
528
529 // If we're dealing with two known constants, just perform the operation.
530 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, V: rhs)) {
531 llvm::APSInt RHSValue = *KnownRHSValue;
532 if (BinaryOperator::isComparisonOp(Opc: op)) {
533 // We're looking for a type big enough to compare the two values.
534 // FIXME: This is not correct. char + short will result in a promotion
535 // to int. Unfortunately we have lost types by this point.
536 APSIntType CompareType = std::max(a: APSIntType(LHSValue),
537 b: APSIntType(RHSValue));
538 CompareType.apply(Value&: LHSValue);
539 CompareType.apply(Value&: RHSValue);
540 } else if (!BinaryOperator::isShiftOp(Opc: op)) {
541 APSIntType IntType = BasicVals.getAPSIntType(T: resultTy);
542 IntType.apply(Value&: LHSValue);
543 IntType.apply(Value&: RHSValue);
544 }
545
546 std::optional<APSIntPtr> Result =
547 BasicVals.evalAPSInt(Op: op, V1: LHSValue, V2: RHSValue);
548 if (!Result) {
549 if (op == BO_Shl || op == BO_Shr) {
550 // FIXME: At this point the constant folding claims that the result
551 // of a bitwise shift is undefined. However, constant folding
552 // relies on the inaccurate type information that is stored in the
553 // bit size of APSInt objects, and if we reached this point, then
554 // the checker core.BitwiseShift already determined that the shift
555 // is valid (in a PreStmt callback, by querying the real type from
556 // the AST node).
557 // To avoid embarrassing false positives, let's just say that we
558 // don't know anything about the result of the shift.
559 return UnknownVal();
560 }
561 return UndefinedVal();
562 }
563
564 return nonloc::ConcreteInt(*Result);
565 }
566
567 // Swap the left and right sides and flip the operator if doing so
568 // allows us to better reason about the expression (this is a form
569 // of expression canonicalization).
570 // While we're at it, catch some special cases for non-commutative ops.
571 switch (op) {
572 case BO_LT:
573 case BO_GT:
574 case BO_LE:
575 case BO_GE:
576 op = BinaryOperator::reverseComparisonOp(Opc: op);
577 [[fallthrough]];
578 case BO_EQ:
579 case BO_NE:
580 case BO_Add:
581 case BO_Mul:
582 case BO_And:
583 case BO_Xor:
584 case BO_Or:
585 std::swap(a&: lhs, b&: rhs);
586 continue;
587 case BO_Shr:
588 // (~0)>>a
589 if (LHSValue.isAllOnes() && LHSValue.isSigned())
590 return evalCast(lhs, resultTy, QualType{});
591 [[fallthrough]];
592 case BO_Shl:
593 // 0<<a and 0>>a
594 if (LHSValue == 0)
595 return evalCast(lhs, resultTy, QualType{});
596 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
597 case BO_Div:
598 // 0 / x == 0
599 case BO_Rem:
600 // 0 % x == 0
601 if (LHSValue == 0)
602 return makeZeroVal(resultTy);
603 [[fallthrough]];
604 default:
605 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
606 }
607 }
608 case nonloc::SymbolValKind: {
609 // We only handle LHS as simple symbols or SymIntExprs.
610 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
611
612 // LHS is a symbolic expression.
613 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Val: Sym)) {
614
615 // Is this a logical not? (!x is represented as x == 0.)
616 if (op == BO_EQ && rhs.isZeroConstant()) {
617 // We know how to negate certain expressions. Simplify them here.
618
619 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
620 switch (opc) {
621 default:
622 // We don't know how to negate this operation.
623 // Just handle it as if it were a normal comparison to 0.
624 break;
625 case BO_LAnd:
626 case BO_LOr:
627 llvm_unreachable("Logical operators handled by branching logic.");
628 case BO_Assign:
629 case BO_MulAssign:
630 case BO_DivAssign:
631 case BO_RemAssign:
632 case BO_AddAssign:
633 case BO_SubAssign:
634 case BO_ShlAssign:
635 case BO_ShrAssign:
636 case BO_AndAssign:
637 case BO_XorAssign:
638 case BO_OrAssign:
639 case BO_Comma:
640 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
641 case BO_PtrMemD:
642 case BO_PtrMemI:
643 llvm_unreachable("Pointer arithmetic not handled here.");
644 case BO_LT:
645 case BO_GT:
646 case BO_LE:
647 case BO_GE:
648 case BO_EQ:
649 case BO_NE:
650 assert(resultTy->isBooleanType() ||
651 resultTy == getConditionType());
652 assert(symIntExpr->getType()->isBooleanType() ||
653 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
654 getConditionType()));
655 // Negate the comparison and make a value.
656 opc = BinaryOperator::negateComparisonOp(Opc: opc);
657 return makeNonLoc(symIntExpr->getLHS(), opc,
658 symIntExpr->getRHS(), resultTy);
659 }
660 }
661
662 // For now, only handle expressions whose RHS is a constant.
663 if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs)) {
664 // If both the LHS and the current expression are additive,
665 // fold their constants and try again.
666 if (BinaryOperator::isAdditiveOp(Opc: op)) {
667 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
668 if (BinaryOperator::isAdditiveOp(Opc: lop)) {
669 // Convert the two constants to a common type, then combine them.
670
671 // resultTy may not be the best type to convert to, but it's
672 // probably the best choice in expressions with mixed type
673 // (such as x+1U+2LL). The rules for implicit conversions should
674 // choose a reasonable type to preserve the expression, and will
675 // at least match how the value is going to be used.
676 APSIntType IntType = BasicVals.getAPSIntType(T: resultTy);
677 const llvm::APSInt &first = IntType.convert(Value: symIntExpr->getRHS());
678 const llvm::APSInt &second = IntType.convert(Value: *RHSValue);
679
680 // If the op and lop agrees, then we just need to
681 // sum the constants. Otherwise, we change to operation
682 // type if substraction would produce negative value
683 // (and cause overflow for unsigned integers),
684 // as consequence x+1U-10 produces x-9U, instead
685 // of x+4294967287U, that would be produced without this
686 // additional check.
687 std::optional<APSIntPtr> newRHS;
688 if (lop == op) {
689 newRHS = BasicVals.evalAPSInt(Op: BO_Add, V1: first, V2: second);
690 } else if (first >= second) {
691 newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: first, V2: second);
692 op = lop;
693 } else {
694 newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: second, V2: first);
695 }
696
697 assert(newRHS && "Invalid operation despite common type!");
698 rhs = nonloc::ConcreteInt(*newRHS);
699 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
700 continue;
701 }
702 }
703
704 // Otherwise, make a SymIntExpr out of the expression.
705 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
706 }
707 }
708
709 // Is the RHS a constant?
710 if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs))
711 return MakeSymIntVal(LHS: Sym, op, RHS: *RHSValue, resultTy);
712
713 if (std::optional<NonLoc> V = tryRearrange(State: state, Op: op, Lhs: lhs, Rhs: rhs, ResultTy: resultTy))
714 return *V;
715
716 // Give up -- this is not a symbolic expression we can handle.
717 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
718 }
719 }
720 }
721}
722
723static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
724 const FieldRegion *RightFR,
725 BinaryOperator::Opcode op,
726 QualType resultTy,
727 SimpleSValBuilder &SVB) {
728 // Only comparisons are meaningful here!
729 if (!BinaryOperator::isComparisonOp(Opc: op))
730 return UnknownVal();
731
732 // Next, see if the two FRs have the same super-region.
733 // FIXME: This doesn't handle casts yet, and simply stripping the casts
734 // doesn't help.
735 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
736 return UnknownVal();
737
738 const FieldDecl *LeftFD = LeftFR->getDecl();
739 const FieldDecl *RightFD = RightFR->getDecl();
740 const RecordDecl *RD = LeftFD->getParent();
741
742 // Make sure the two FRs are from the same kind of record. Just in case!
743 // FIXME: This is probably where inheritance would be a problem.
744 if (RD != RightFD->getParent())
745 return UnknownVal();
746
747 // We know for sure that the two fields are not the same, since that
748 // would have given us the same SVal.
749 if (op == BO_EQ)
750 return SVB.makeTruthVal(false, resultTy);
751 if (op == BO_NE)
752 return SVB.makeTruthVal(true, resultTy);
753
754 // Iterate through the fields and see which one comes first.
755 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
756 // members and the units in which bit-fields reside have addresses that
757 // increase in the order in which they are declared."
758 bool leftFirst = (op == BO_LT || op == BO_LE);
759 for (const auto *I : RD->fields()) {
760 if (I == LeftFD)
761 return SVB.makeTruthVal(leftFirst, resultTy);
762 if (I == RightFD)
763 return SVB.makeTruthVal(!leftFirst, resultTy);
764 }
765
766 llvm_unreachable("Fields not found in parent record's definition");
767}
768
769// This is used in debug builds only for now because some downstream users
770// may hit this assert in their subsequent merges.
771// There are still places in the analyzer where equal bitwidth Locs
772// are compared, and need to be found and corrected. Recent previous fixes have
773// addressed the known problems of making NULLs with specific bitwidths
774// for Loc comparisons along with deprecation of APIs for the same purpose.
775//
776static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
777 Loc LhsLoc) {
778 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
779 ASTContext &Ctx = State->getStateManager().getContext();
780 uint64_t RhsBitwidth =
781 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: RhsLoc.getType(Ctx));
782 uint64_t LhsBitwidth =
783 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: LhsLoc.getType(Ctx));
784 if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
785 assert(RhsBitwidth == LhsBitwidth &&
786 "RhsLoc and LhsLoc bitwidth must be same!");
787 }
788}
789
790// FIXME: all this logic will change if/when we have MemRegion::getLocation().
791SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
792 BinaryOperator::Opcode op,
793 Loc lhs, Loc rhs,
794 QualType resultTy) {
795
796 // Assert that bitwidth of lhs and rhs are the same.
797 // This can happen if two different address spaces are used,
798 // and the bitwidths of the address spaces are different.
799 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
800 // FIXME: See comment above in the function assertEqualBitWidths
801 assertEqualBitWidths(State: state, RhsLoc: rhs, LhsLoc: lhs);
802
803 // Only comparisons and subtractions are valid operations on two pointers.
804 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
805 // However, if a pointer is casted to an integer, evalBinOpNN may end up
806 // calling this function with another operation (PR7527). We don't attempt to
807 // model this for now, but it could be useful, particularly when the
808 // "location" is actually an integer value that's been passed through a void*.
809 if (!(BinaryOperator::isComparisonOp(Opc: op) || op == BO_Sub))
810 return UnknownVal();
811
812 // Special cases for when both sides are identical.
813 if (lhs == rhs) {
814 switch (op) {
815 default:
816 llvm_unreachable("Unimplemented operation for two identical values");
817 case BO_Sub:
818 return makeZeroVal(resultTy);
819 case BO_EQ:
820 case BO_LE:
821 case BO_GE:
822 return makeTruthVal(true, resultTy);
823 case BO_NE:
824 case BO_LT:
825 case BO_GT:
826 return makeTruthVal(false, resultTy);
827 }
828 }
829
830 switch (lhs.getKind()) {
831 default:
832 llvm_unreachable("Ordering not implemented for this Loc.");
833
834 case loc::GotoLabelKind:
835 // The only thing we know about labels is that they're non-null.
836 if (rhs.isZeroConstant()) {
837 switch (op) {
838 default:
839 break;
840 case BO_Sub:
841 return evalCast(lhs, resultTy, QualType{});
842 case BO_EQ:
843 case BO_LE:
844 case BO_LT:
845 return makeTruthVal(false, resultTy);
846 case BO_NE:
847 case BO_GT:
848 case BO_GE:
849 return makeTruthVal(true, resultTy);
850 }
851 }
852 // There may be two labels for the same location, and a function region may
853 // have the same address as a label at the start of the function (depending
854 // on the ABI).
855 // FIXME: we can probably do a comparison against other MemRegions, though.
856 // FIXME: is there a way to tell if two labels refer to the same location?
857 return UnknownVal();
858
859 case loc::ConcreteIntKind: {
860 auto L = lhs.castAs<loc::ConcreteInt>();
861
862 // If one of the operands is a symbol and the other is a constant,
863 // build an expression for use by the constraint manager.
864 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
865 if (op == BO_Cmp)
866 return UnknownVal();
867
868 if (!BinaryOperator::isComparisonOp(Opc: op))
869 return makeNonLoc(L.getValue(), op, rSym, resultTy);
870
871 op = BinaryOperator::reverseComparisonOp(Opc: op);
872 return makeNonLoc(rSym, op, L.getValue(), resultTy);
873 }
874
875 // If both operands are constants, just perform the operation.
876 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
877 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
878
879 if (std::optional<APSIntPtr> ResultInt =
880 BasicVals.evalAPSInt(Op: op, V1: L.getValue(), V2: rInt->getValue()))
881 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
882 return UnknownVal();
883 }
884
885 // Special case comparisons against NULL.
886 // This must come after the test if the RHS is a symbol, which is used to
887 // build constraints. The address of any non-symbolic region is guaranteed
888 // to be non-NULL, as is any label.
889 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
890 if (lhs.isZeroConstant()) {
891 switch (op) {
892 default:
893 break;
894 case BO_EQ:
895 case BO_GT:
896 case BO_GE:
897 return makeTruthVal(false, resultTy);
898 case BO_NE:
899 case BO_LT:
900 case BO_LE:
901 return makeTruthVal(true, resultTy);
902 }
903 }
904
905 // Comparing an arbitrary integer to a region or label address is
906 // completely unknowable.
907 return UnknownVal();
908 }
909 case loc::MemRegionValKind: {
910 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
911 // If one of the operands is a symbol and the other is a constant,
912 // build an expression for use by the constraint manager.
913 if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true)) {
914 if (BinaryOperator::isComparisonOp(Opc: op))
915 return MakeSymIntVal(LHS: lSym, op, RHS: rInt->getValue(), resultTy);
916 return UnknownVal();
917 }
918 // Special case comparisons to NULL.
919 // This must come after the test if the LHS is a symbol, which is used to
920 // build constraints. The address of any non-symbolic region is guaranteed
921 // to be non-NULL.
922 if (rInt->isZeroConstant()) {
923 if (op == BO_Sub)
924 return evalCast(lhs, resultTy, QualType{});
925
926 if (BinaryOperator::isComparisonOp(Opc: op)) {
927 QualType boolType = getContext().BoolTy;
928 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
929 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
930 return evalBinOpNN(state, op, lhs: l, rhs: r, resultTy);
931 }
932 }
933
934 // Comparing a region to an arbitrary integer is completely unknowable.
935 return UnknownVal();
936 }
937
938 // Get both values as regions, if possible.
939 const MemRegion *LeftMR = lhs.getAsRegion();
940 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
941
942 const MemRegion *RightMR = rhs.getAsRegion();
943 if (!RightMR)
944 // The RHS is probably a label, which in theory could address a region.
945 // FIXME: we can probably make a more useful statement about non-code
946 // regions, though.
947 return UnknownVal();
948
949 const MemRegion *LeftBase = LeftMR->getBaseRegion();
950 const MemRegion *RightBase = RightMR->getBaseRegion();
951 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(State: state);
952 const MemSpaceRegion *RightMS = RightBase->getMemorySpace(State: state);
953 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
954
955 // If the two regions are from different known memory spaces they cannot be
956 // equal. Also, assume that no symbolic region (whose memory space is
957 // unknown) is on the stack.
958 if (LeftMS != RightMS &&
959 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
960 (isa<StackSpaceRegion>(Val: LeftMS) || isa<StackSpaceRegion>(Val: RightMS)))) {
961 switch (op) {
962 default:
963 return UnknownVal();
964 case BO_EQ:
965 return makeTruthVal(false, resultTy);
966 case BO_NE:
967 return makeTruthVal(true, resultTy);
968 }
969 }
970
971 // If both values wrap regions, see if they're from different base regions.
972 // Note, heap base symbolic regions are assumed to not alias with
973 // each other; for example, we assume that malloc returns different address
974 // on each invocation.
975 // FIXME: ObjC object pointers always reside on the heap, but currently
976 // we treat their memory space as unknown, because symbolic pointers
977 // to ObjC objects may alias. There should be a way to construct
978 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
979 // guesses memory space for ObjC object pointers manually instead of
980 // relying on us.
981 if (LeftBase != RightBase &&
982 ((!isa<SymbolicRegion>(Val: LeftBase) && !isa<SymbolicRegion>(Val: RightBase)) ||
983 (isa<HeapSpaceRegion>(Val: LeftMS) || isa<HeapSpaceRegion>(Val: RightMS))) ){
984 switch (op) {
985 default:
986 return UnknownVal();
987 case BO_EQ:
988 return makeTruthVal(false, resultTy);
989 case BO_NE:
990 return makeTruthVal(true, resultTy);
991 }
992 }
993
994 // Handle special cases for when both regions are element regions.
995 const ElementRegion *RightER = dyn_cast<ElementRegion>(Val: RightMR);
996 const ElementRegion *LeftER = dyn_cast<ElementRegion>(Val: LeftMR);
997 if (RightER && LeftER) {
998 // Next, see if the two ERs have the same super-region and matching types.
999 // FIXME: This should do something useful even if the types don't match,
1000 // though if both indexes are constant the RegionRawOffset path will
1001 // give the correct answer.
1002 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1003 LeftER->getElementType() == RightER->getElementType()) {
1004 // Get the left index and cast it to the correct type.
1005 // If the index is unknown or undefined, bail out here.
1006 SVal LeftIndexVal = LeftER->getIndex();
1007 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1008 if (!LeftIndex)
1009 return UnknownVal();
1010 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
1011 LeftIndex = LeftIndexVal.getAs<NonLoc>();
1012 if (!LeftIndex)
1013 return UnknownVal();
1014
1015 // Do the same for the right index.
1016 SVal RightIndexVal = RightER->getIndex();
1017 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1018 if (!RightIndex)
1019 return UnknownVal();
1020 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
1021 RightIndex = RightIndexVal.getAs<NonLoc>();
1022 if (!RightIndex)
1023 return UnknownVal();
1024
1025 // Actually perform the operation.
1026 // evalBinOpNN expects the two indexes to already be the right type.
1027 return evalBinOpNN(state, op, lhs: *LeftIndex, rhs: *RightIndex, resultTy);
1028 }
1029 }
1030
1031 // Special handling of the FieldRegions, even with symbolic offsets.
1032 const FieldRegion *RightFR = dyn_cast<FieldRegion>(Val: RightMR);
1033 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(Val: LeftMR);
1034 if (RightFR && LeftFR) {
1035 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1036 SVB&: *this);
1037 if (!R.isUnknown())
1038 return R;
1039 }
1040
1041 // Compare the regions using the raw offsets.
1042 RegionOffset LeftOffset = LeftMR->getAsOffset();
1043 RegionOffset RightOffset = RightMR->getAsOffset();
1044
1045 if (LeftOffset.getRegion() != nullptr &&
1046 LeftOffset.getRegion() == RightOffset.getRegion() &&
1047 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1048 int64_t left = LeftOffset.getOffset();
1049 int64_t right = RightOffset.getOffset();
1050
1051 switch (op) {
1052 default:
1053 return UnknownVal();
1054 case BO_LT:
1055 return makeTruthVal(left < right, resultTy);
1056 case BO_GT:
1057 return makeTruthVal(left > right, resultTy);
1058 case BO_LE:
1059 return makeTruthVal(left <= right, resultTy);
1060 case BO_GE:
1061 return makeTruthVal(left >= right, resultTy);
1062 case BO_EQ:
1063 return makeTruthVal(left == right, resultTy);
1064 case BO_NE:
1065 return makeTruthVal(left != right, resultTy);
1066 }
1067 }
1068
1069 // At this point we're not going to get a good answer, but we can try
1070 // conjuring an expression instead.
1071 SymbolRef LHSSym = lhs.getAsLocSymbol();
1072 SymbolRef RHSSym = rhs.getAsLocSymbol();
1073 if (LHSSym && RHSSym)
1074 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1075
1076 // If we get here, we have no way of comparing the regions.
1077 return UnknownVal();
1078 }
1079 }
1080}
1081
1082SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1083 BinaryOperator::Opcode op, Loc lhs,
1084 NonLoc rhs, QualType resultTy) {
1085 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1086 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1087 if (PTMSV->isNullMemberPointer())
1088 return UndefinedVal();
1089
1090 auto getFieldLValue = [&](const auto *FD) -> SVal {
1091 SVal Result = lhs;
1092
1093 for (const auto &I : *PTMSV)
1094 Result = StateMgr.getStoreManager().evalDerivedToBase(
1095 Derived: Result, DerivedPtrType: I->getType(), IsVirtual: I->isVirtual());
1096
1097 return state->getLValue(FD, Result);
1098 };
1099
1100 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1101 return getFieldLValue(FD);
1102 }
1103 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1104 return getFieldLValue(FD);
1105 }
1106 }
1107
1108 return rhs;
1109 }
1110
1111 assert(!BinaryOperator::isComparisonOp(op) &&
1112 "arguments to comparison ops must be of the same type");
1113
1114 // Special case: rhs is a zero constant.
1115 if (rhs.isZeroConstant())
1116 return lhs;
1117
1118 // Perserve the null pointer so that it can be found by the DerefChecker.
1119 if (lhs.isZeroConstant())
1120 return lhs;
1121
1122 // We are dealing with pointer arithmetic.
1123
1124 // Handle pointer arithmetic on constant values.
1125 if (std::optional<nonloc::ConcreteInt> rhsInt =
1126 rhs.getAs<nonloc::ConcreteInt>()) {
1127 if (std::optional<loc::ConcreteInt> lhsInt =
1128 lhs.getAs<loc::ConcreteInt>()) {
1129 const llvm::APSInt &leftI = lhsInt->getValue();
1130 assert(leftI.isUnsigned());
1131 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1132
1133 // Convert the bitwidth of rightI. This should deal with overflow
1134 // since we are dealing with concrete values.
1135 rightI = rightI.extOrTrunc(width: leftI.getBitWidth());
1136
1137 // Offset the increment by the pointer size.
1138 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1139 QualType pointeeType = resultTy->getPointeeType();
1140 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1141 rightI *= Multiplicand;
1142
1143 // Compute the adjusted pointer.
1144 switch (op) {
1145 case BO_Add:
1146 rightI = leftI + rightI;
1147 break;
1148 case BO_Sub:
1149 rightI = leftI - rightI;
1150 break;
1151 default:
1152 llvm_unreachable("Invalid pointer arithmetic operation");
1153 }
1154 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1155 }
1156 }
1157
1158 // Handle cases where 'lhs' is a region.
1159 if (const MemRegion *region = lhs.getAsRegion()) {
1160 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1161 SVal index = UnknownVal();
1162 const SubRegion *superR = nullptr;
1163 // We need to know the type of the pointer in order to add an integer to it.
1164 // Depending on the type, different amount of bytes is added.
1165 QualType elementType;
1166
1167 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(Val: region)) {
1168 assert(op == BO_Add || op == BO_Sub);
1169 index = evalBinOpNN(state, op, lhs: elemReg->getIndex(), rhs,
1170 resultTy: getArrayIndexType());
1171 superR = cast<SubRegion>(elemReg->getSuperRegion());
1172 elementType = elemReg->getElementType();
1173 }
1174 else if (isa<SubRegion>(Val: region)) {
1175 assert(op == BO_Add || op == BO_Sub);
1176 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1177 superR = cast<SubRegion>(Val: region);
1178 // TODO: Is this actually reliable? Maybe improving our MemRegion
1179 // hierarchy to provide typed regions for all non-void pointers would be
1180 // better. For instance, we cannot extend this towards LocAsInteger
1181 // operations, where result type of the expression is integer.
1182 if (resultTy->isAnyPointerType())
1183 elementType = resultTy->getPointeeType();
1184 }
1185
1186 // Represent arithmetic on void pointers as arithmetic on char pointers.
1187 // It is fine when a TypedValueRegion of char value type represents
1188 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1189 if (elementType->isVoidType())
1190 elementType = getContext().CharTy;
1191
1192 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1193 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, Idx: *indexV,
1194 superRegion: superR, Ctx: getContext()));
1195 }
1196 }
1197 return UnknownVal();
1198}
1199
1200const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1201 SVal V) {
1202 if (const llvm::APSInt *Res = getConcreteValue(V))
1203 return Res;
1204
1205 if (SymbolRef Sym = V.getAsSymbol())
1206 return state->getConstraintManager().getSymVal(state, sym: Sym);
1207
1208 return nullptr;
1209}
1210
1211const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) {
1212 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1213 return X->getValue().get();
1214
1215 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1216 return X->getValue().get();
1217
1218 return nullptr;
1219}
1220
1221const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1222 SVal V) {
1223 return getConstValue(state, V: simplifySVal(State: state, V));
1224}
1225
1226const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1227 SVal V) {
1228 V = simplifySVal(State: state, V);
1229
1230 if (const llvm::APSInt *Res = getConcreteValue(V))
1231 return Res;
1232
1233 if (SymbolRef Sym = V.getAsSymbol())
1234 return state->getConstraintManager().getSymMinVal(state, sym: Sym);
1235
1236 return nullptr;
1237}
1238
1239const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1240 SVal V) {
1241 V = simplifySVal(State: state, V);
1242
1243 if (const llvm::APSInt *Res = getConcreteValue(V))
1244 return Res;
1245
1246 if (SymbolRef Sym = V.getAsSymbol())
1247 return state->getConstraintManager().getSymMaxVal(state, sym: Sym);
1248
1249 return nullptr;
1250}
1251
1252SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1253 SVal SimplifiedVal = simplifySValOnce(State, V: Val);
1254 while (SimplifiedVal != Val) {
1255 Val = SimplifiedVal;
1256 SimplifiedVal = simplifySValOnce(State, V: Val);
1257 }
1258 return SimplifiedVal;
1259}
1260
1261SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1262 return simplifyUntilFixpoint(State, Val: V);
1263}
1264
1265SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1266 // For now, this function tries to constant-fold symbols inside a
1267 // nonloc::SymbolVal, and does nothing else. More simplifications should
1268 // be possible, such as constant-folding an index in an ElementRegion.
1269
1270 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1271 ProgramStateRef State;
1272 SValBuilder &SVB;
1273
1274 // Cache results for the lifetime of the Simplifier. Results change every
1275 // time new constraints are added to the program state, which is the whole
1276 // point of simplifying, and for that very reason it's pointless to maintain
1277 // the same cache for the duration of the whole analysis.
1278 llvm::DenseMap<SymbolRef, SVal> Cached;
1279
1280 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1281 return Sym == Val.getAsSymbol();
1282 }
1283
1284 SVal cache(SymbolRef Sym, SVal V) {
1285 Cached[Sym] = V;
1286 return V;
1287 }
1288
1289 SVal skip(SymbolRef Sym) {
1290 return cache(Sym, V: SVB.makeSymbolVal(Sym));
1291 }
1292
1293 // Return the known const value for the Sym if available, or return Undef
1294 // otherwise.
1295 SVal getConst(SymbolRef Sym) {
1296 const llvm::APSInt *Const =
1297 State->getConstraintManager().getSymVal(state: State, sym: Sym);
1298 if (Const)
1299 return Loc::isLocType(T: Sym->getType()) ? (SVal)SVB.makeIntLocVal(integer: *Const)
1300 : (SVal)SVB.makeIntVal(integer: *Const);
1301 return UndefinedVal();
1302 }
1303
1304 SVal getConstOrVisit(SymbolRef Sym) {
1305 const SVal Ret = getConst(Sym);
1306 if (Ret.isUndef())
1307 return Visit(S: Sym);
1308 return Ret;
1309 }
1310
1311 public:
1312 Simplifier(ProgramStateRef State)
1313 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1314
1315 SVal VisitSymbolData(const SymbolData *S) {
1316 // No cache here.
1317 if (const llvm::APSInt *I =
1318 State->getConstraintManager().getSymVal(state: State, sym: S))
1319 return Loc::isLocType(T: S->getType()) ? (SVal)SVB.makeIntLocVal(integer: *I)
1320 : (SVal)SVB.makeIntVal(integer: *I);
1321 return SVB.makeSymbolVal(Sym: S);
1322 }
1323
1324 SVal VisitSymIntExpr(const SymIntExpr *S) {
1325 auto I = Cached.find(S);
1326 if (I != Cached.end())
1327 return I->second;
1328
1329 SVal LHS = getConstOrVisit(Sym: S->getLHS());
1330 if (isUnchanged(Sym: S->getLHS(), Val: LHS))
1331 return skip(S);
1332
1333 SVal RHS;
1334 // By looking at the APSInt in the right-hand side of S, we cannot
1335 // figure out if it should be treated as a Loc or as a NonLoc.
1336 // So make our guess by recalling that we cannot multiply pointers
1337 // or compare a pointer to an integer.
1338 if (Loc::isLocType(T: S->getLHS()->getType()) &&
1339 BinaryOperator::isComparisonOp(S->getOpcode())) {
1340 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1341 // the same meaning for Loc-type symbols, but the latter form
1342 // is preferred in SVal computations for being Loc itself.
1343 if (SymbolRef Sym = LHS.getAsSymbol()) {
1344 assert(Loc::isLocType(Sym->getType()));
1345 LHS = SVB.makeLoc(sym: Sym);
1346 }
1347 RHS = SVB.makeIntLocVal(integer: S->getRHS());
1348 } else {
1349 RHS = SVB.makeIntVal(integer: S->getRHS());
1350 }
1351
1352 return cache(
1353 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1354 }
1355
1356 SVal VisitIntSymExpr(const IntSymExpr *S) {
1357 auto I = Cached.find(S);
1358 if (I != Cached.end())
1359 return I->second;
1360
1361 SVal RHS = getConstOrVisit(Sym: S->getRHS());
1362 if (isUnchanged(Sym: S->getRHS(), Val: RHS))
1363 return skip(S);
1364
1365 SVal LHS = SVB.makeIntVal(integer: S->getLHS());
1366 return cache(
1367 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1368 }
1369
1370 SVal VisitSymSymExpr(const SymSymExpr *S) {
1371 auto I = Cached.find(S);
1372 if (I != Cached.end())
1373 return I->second;
1374
1375 // For now don't try to simplify mixed Loc/NonLoc expressions
1376 // because they often appear from LocAsInteger operations
1377 // and we don't know how to combine a LocAsInteger
1378 // with a concrete value.
1379 if (Loc::isLocType(T: S->getLHS()->getType()) !=
1380 Loc::isLocType(T: S->getRHS()->getType()))
1381 return skip(S);
1382
1383 SVal LHS = getConstOrVisit(Sym: S->getLHS());
1384 SVal RHS = getConstOrVisit(Sym: S->getRHS());
1385
1386 if (isUnchanged(Sym: S->getLHS(), Val: LHS) && isUnchanged(Sym: S->getRHS(), Val: RHS))
1387 return skip(S);
1388
1389 return cache(
1390 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1391 }
1392
1393 SVal VisitSymbolCast(const SymbolCast *S) {
1394 auto I = Cached.find(S);
1395 if (I != Cached.end())
1396 return I->second;
1397 const SymExpr *OpSym = S->getOperand();
1398 SVal OpVal = getConstOrVisit(Sym: OpSym);
1399 if (isUnchanged(Sym: OpSym, Val: OpVal))
1400 return skip(S);
1401
1402 return cache(S, SVB.evalCast(V: OpVal, CastTy: S->getType(), OriginalTy: OpSym->getType()));
1403 }
1404
1405 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1406 auto I = Cached.find(S);
1407 if (I != Cached.end())
1408 return I->second;
1409 SVal Op = getConstOrVisit(Sym: S->getOperand());
1410 if (isUnchanged(Sym: S->getOperand(), Val: Op))
1411 return skip(S);
1412
1413 return cache(
1414 S, SVB.evalUnaryOp(state: State, opc: S->getOpcode(), operand: Op, type: S->getType()));
1415 }
1416
1417 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1418
1419 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1420
1421 SVal VisitSymbolVal(nonloc::SymbolVal V) {
1422 // Simplification is much more costly than computing complexity.
1423 // For high complexity, it may be not worth it.
1424 return Visit(S: V.getSymbol());
1425 }
1426
1427 SVal VisitSVal(SVal V) { return V; }
1428 };
1429
1430 SVal SimplifiedV = Simplifier(State).Visit(V);
1431 return SimplifiedV;
1432}
1433

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp