1 | // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*- |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines SimpleSValBuilder, a basic implementation of SValBuilder. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" |
14 | #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" |
15 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" |
16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
17 | #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" |
18 | #include <optional> |
19 | |
20 | using namespace clang; |
21 | using namespace ento; |
22 | |
23 | namespace { |
24 | class SimpleSValBuilder : public SValBuilder { |
25 | |
26 | // Query the constraint manager whether the SVal has only one possible |
27 | // (integer) value. If that is the case, the value is returned. Otherwise, |
28 | // returns NULL. |
29 | // This is an implementation detail. Checkers should use `getKnownValue()` |
30 | // instead. |
31 | static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V); |
32 | |
33 | // Helper function that returns the value stored in a nonloc::ConcreteInt or |
34 | // loc::ConcreteInt. |
35 | static const llvm::APSInt *getConcreteValue(SVal V); |
36 | |
37 | // With one `simplifySValOnce` call, a compound symbols might collapse to |
38 | // simpler symbol tree that is still possible to further simplify. Thus, we |
39 | // do the simplification on a new symbol tree until we reach the simplest |
40 | // form, i.e. the fixpoint. |
41 | // Consider the following symbol `(b * b) * b * b` which has this tree: |
42 | // * |
43 | // / \ |
44 | // * b |
45 | // / \ |
46 | // / b |
47 | // (b * b) |
48 | // Now, if the `b * b == 1` new constraint is added then during the first |
49 | // iteration we have the following transformations: |
50 | // * * |
51 | // / \ / \ |
52 | // * b --> b b |
53 | // / \ |
54 | // / b |
55 | // 1 |
56 | // We need another iteration to reach the final result `1`. |
57 | SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val); |
58 | |
59 | // Recursively descends into symbolic expressions and replaces symbols |
60 | // with their known values (in the sense of the getConstValue() method). |
61 | // We traverse the symbol tree and query the constraint values for the |
62 | // sub-trees and if a value is a constant we do the constant folding. |
63 | SVal simplifySValOnce(ProgramStateRef State, SVal V); |
64 | |
65 | public: |
66 | SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, |
67 | ProgramStateManager &stateMgr) |
68 | : SValBuilder(alloc, context, stateMgr) {} |
69 | ~SimpleSValBuilder() override {} |
70 | |
71 | SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, |
72 | NonLoc lhs, NonLoc rhs, QualType resultTy) override; |
73 | SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, |
74 | Loc lhs, Loc rhs, QualType resultTy) override; |
75 | SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, |
76 | Loc lhs, NonLoc rhs, QualType resultTy) override; |
77 | |
78 | /// Evaluates a given SVal by recursively evaluating and |
79 | /// simplifying the children SVals. If the SVal has only one possible |
80 | /// (integer) value, that value is returned. Otherwise, returns NULL. |
81 | const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override; |
82 | |
83 | /// Evaluates a given SVal by recursively evaluating and simplifying the |
84 | /// children SVals, then returns its minimal possible (integer) value. If the |
85 | /// constraint manager cannot provide a meaningful answer, this returns NULL. |
86 | const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override; |
87 | |
88 | /// Evaluates a given SVal by recursively evaluating and simplifying the |
89 | /// children SVals, then returns its maximal possible (integer) value. If the |
90 | /// constraint manager cannot provide a meaningful answer, this returns NULL. |
91 | const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override; |
92 | |
93 | SVal simplifySVal(ProgramStateRef State, SVal V) override; |
94 | |
95 | SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op, |
96 | const llvm::APSInt &RHS, QualType resultTy); |
97 | }; |
98 | } // end anonymous namespace |
99 | |
100 | SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, |
101 | ASTContext &context, |
102 | ProgramStateManager &stateMgr) { |
103 | return new SimpleSValBuilder(alloc, context, stateMgr); |
104 | } |
105 | |
106 | // Checks if the negation the value and flipping sign preserve |
107 | // the semantics on the operation in the resultType |
108 | static bool isNegationValuePreserving(const llvm::APSInt &Value, |
109 | APSIntType ResultType) { |
110 | const unsigned ValueBits = Value.getSignificantBits(); |
111 | if (ValueBits == ResultType.getBitWidth()) { |
112 | // The value is the lowest negative value that is representable |
113 | // in signed integer with bitWith of result type. The |
114 | // negation is representable if resultType is unsigned. |
115 | return ResultType.isUnsigned(); |
116 | } |
117 | |
118 | // If resultType bitWith is higher that number of bits required |
119 | // to represent RHS, the sign flip produce same value. |
120 | return ValueBits < ResultType.getBitWidth(); |
121 | } |
122 | |
123 | //===----------------------------------------------------------------------===// |
124 | // Transfer function for binary operators. |
125 | //===----------------------------------------------------------------------===// |
126 | |
127 | SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, |
128 | BinaryOperator::Opcode op, |
129 | const llvm::APSInt &RHS, |
130 | QualType resultTy) { |
131 | bool isIdempotent = false; |
132 | |
133 | // Check for a few special cases with known reductions first. |
134 | switch (op) { |
135 | default: |
136 | // We can't reduce this case; just treat it normally. |
137 | break; |
138 | case BO_Mul: |
139 | // a*0 and a*1 |
140 | if (RHS == 0) |
141 | return makeIntVal(0, resultTy); |
142 | else if (RHS == 1) |
143 | isIdempotent = true; |
144 | break; |
145 | case BO_Div: |
146 | // a/0 and a/1 |
147 | if (RHS == 0) |
148 | // This is also handled elsewhere. |
149 | return UndefinedVal(); |
150 | else if (RHS == 1) |
151 | isIdempotent = true; |
152 | break; |
153 | case BO_Rem: |
154 | // a%0 and a%1 |
155 | if (RHS == 0) |
156 | // This is also handled elsewhere. |
157 | return UndefinedVal(); |
158 | else if (RHS == 1) |
159 | return makeIntVal(0, resultTy); |
160 | break; |
161 | case BO_Add: |
162 | case BO_Sub: |
163 | case BO_Shl: |
164 | case BO_Shr: |
165 | case BO_Xor: |
166 | // a+0, a-0, a<<0, a>>0, a^0 |
167 | if (RHS == 0) |
168 | isIdempotent = true; |
169 | break; |
170 | case BO_And: |
171 | // a&0 and a&(~0) |
172 | if (RHS == 0) |
173 | return makeIntVal(0, resultTy); |
174 | else if (RHS.isAllOnes()) |
175 | isIdempotent = true; |
176 | break; |
177 | case BO_Or: |
178 | // a|0 and a|(~0) |
179 | if (RHS == 0) |
180 | isIdempotent = true; |
181 | else if (RHS.isAllOnes()) { |
182 | const llvm::APSInt &Result = BasicVals.Convert(T: resultTy, From: RHS); |
183 | return nonloc::ConcreteInt(Result); |
184 | } |
185 | break; |
186 | } |
187 | |
188 | // Idempotent ops (like a*1) can still change the type of an expression. |
189 | // Wrap the LHS up in a NonLoc again and let evalCast do the |
190 | // dirty work. |
191 | if (isIdempotent) |
192 | return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{}); |
193 | |
194 | // If we reach this point, the expression cannot be simplified. |
195 | // Make a SymbolVal for the entire expression, after converting the RHS. |
196 | const llvm::APSInt *ConvertedRHS = &RHS; |
197 | if (BinaryOperator::isComparisonOp(Opc: op)) { |
198 | // We're looking for a type big enough to compare the symbolic value |
199 | // with the given constant. |
200 | // FIXME: This is an approximation of Sema::UsualArithmeticConversions. |
201 | ASTContext &Ctx = getContext(); |
202 | QualType SymbolType = LHS->getType(); |
203 | uint64_t ValWidth = RHS.getBitWidth(); |
204 | uint64_t TypeWidth = Ctx.getTypeSize(T: SymbolType); |
205 | |
206 | if (ValWidth < TypeWidth) { |
207 | // If the value is too small, extend it. |
208 | ConvertedRHS = &BasicVals.Convert(T: SymbolType, From: RHS); |
209 | } else if (ValWidth == TypeWidth) { |
210 | // If the value is signed but the symbol is unsigned, do the comparison |
211 | // in unsigned space. [C99 6.3.1.8] |
212 | // (For the opposite case, the value is already unsigned.) |
213 | if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType()) |
214 | ConvertedRHS = &BasicVals.Convert(T: SymbolType, From: RHS); |
215 | } |
216 | } else if (BinaryOperator::isAdditiveOp(Opc: op) && RHS.isNegative()) { |
217 | // Change a+(-N) into a-N, and a-(-N) into a+N |
218 | // Adjust addition/subtraction of negative value, to |
219 | // subtraction/addition of the negated value. |
220 | APSIntType resultIntTy = BasicVals.getAPSIntType(T: resultTy); |
221 | if (isNegationValuePreserving(Value: RHS, ResultType: resultIntTy)) { |
222 | ConvertedRHS = &BasicVals.getValue(X: -resultIntTy.convert(Value: RHS)); |
223 | op = (op == BO_Add) ? BO_Sub : BO_Add; |
224 | } else { |
225 | ConvertedRHS = &BasicVals.Convert(T: resultTy, From: RHS); |
226 | } |
227 | } else |
228 | ConvertedRHS = &BasicVals.Convert(T: resultTy, From: RHS); |
229 | |
230 | return makeNonLoc(LHS, op, *ConvertedRHS, resultTy); |
231 | } |
232 | |
233 | // See if Sym is known to be a relation Rel with Bound. |
234 | static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, |
235 | llvm::APSInt Bound, ProgramStateRef State) { |
236 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |
237 | SVal Result = |
238 | SVB.evalBinOpNN(state: State, op: Rel, lhs: nonloc::SymbolVal(Sym), |
239 | rhs: nonloc::ConcreteInt(Bound), resultTy: SVB.getConditionType()); |
240 | if (auto DV = Result.getAs<DefinedSVal>()) { |
241 | return !State->assume(Cond: *DV, Assumption: false); |
242 | } |
243 | return false; |
244 | } |
245 | |
246 | // See if Sym is known to be within [min/4, max/4], where min and max |
247 | // are the bounds of the symbol's integral type. With such symbols, |
248 | // some manipulations can be performed without the risk of overflow. |
249 | // assume() doesn't cause infinite recursion because we should be dealing |
250 | // with simpler symbols on every recursive call. |
251 | static bool isWithinConstantOverflowBounds(SymbolRef Sym, |
252 | ProgramStateRef State) { |
253 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |
254 | BasicValueFactory &BV = SVB.getBasicValueFactory(); |
255 | |
256 | QualType T = Sym->getType(); |
257 | assert(T->isSignedIntegerOrEnumerationType() && |
258 | "This only works with signed integers!" ); |
259 | APSIntType AT = BV.getAPSIntType(T); |
260 | |
261 | llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4), Min = -Max; |
262 | return isInRelation(Rel: BO_LE, Sym, Bound: Max, State) && |
263 | isInRelation(Rel: BO_GE, Sym, Bound: Min, State); |
264 | } |
265 | |
266 | // Same for the concrete integers: see if I is within [min/4, max/4]. |
267 | static bool isWithinConstantOverflowBounds(llvm::APSInt I) { |
268 | APSIntType AT(I); |
269 | assert(!AT.isUnsigned() && |
270 | "This only works with signed integers!" ); |
271 | |
272 | llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4), Min = -Max; |
273 | return (I <= Max) && (I >= -Max); |
274 | } |
275 | |
276 | static std::pair<SymbolRef, llvm::APSInt> |
277 | decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) { |
278 | if (const auto *SymInt = dyn_cast<SymIntExpr>(Val: Sym)) |
279 | if (BinaryOperator::isAdditiveOp(SymInt->getOpcode())) |
280 | return std::make_pair(SymInt->getLHS(), |
281 | (SymInt->getOpcode() == BO_Add) ? |
282 | (SymInt->getRHS()) : |
283 | (-SymInt->getRHS())); |
284 | |
285 | // Fail to decompose: "reduce" the problem to the "$x + 0" case. |
286 | return std::make_pair(x&: Sym, y: BV.getValue(X: 0, T: Sym->getType())); |
287 | } |
288 | |
289 | // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the |
290 | // same signed integral type and no overflows occur (which should be checked |
291 | // by the caller). |
292 | static NonLoc doRearrangeUnchecked(ProgramStateRef State, |
293 | BinaryOperator::Opcode Op, |
294 | SymbolRef LSym, llvm::APSInt LInt, |
295 | SymbolRef RSym, llvm::APSInt RInt) { |
296 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |
297 | BasicValueFactory &BV = SVB.getBasicValueFactory(); |
298 | SymbolManager &SymMgr = SVB.getSymbolManager(); |
299 | |
300 | QualType SymTy = LSym->getType(); |
301 | assert(SymTy == RSym->getType() && |
302 | "Symbols are not of the same type!" ); |
303 | assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) && |
304 | "Integers are not of the same type as symbols!" ); |
305 | assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) && |
306 | "Integers are not of the same type as symbols!" ); |
307 | |
308 | QualType ResultTy; |
309 | if (BinaryOperator::isComparisonOp(Opc: Op)) |
310 | ResultTy = SVB.getConditionType(); |
311 | else if (BinaryOperator::isAdditiveOp(Opc: Op)) |
312 | ResultTy = SymTy; |
313 | else |
314 | llvm_unreachable("Operation not suitable for unchecked rearrangement!" ); |
315 | |
316 | if (LSym == RSym) |
317 | return SVB.evalBinOpNN(state: State, op: Op, lhs: nonloc::ConcreteInt(LInt), |
318 | rhs: nonloc::ConcreteInt(RInt), resultTy: ResultTy) |
319 | .castAs<NonLoc>(); |
320 | |
321 | SymbolRef ResultSym = nullptr; |
322 | BinaryOperator::Opcode ResultOp; |
323 | llvm::APSInt ResultInt; |
324 | if (BinaryOperator::isComparisonOp(Opc: Op)) { |
325 | // Prefer comparing to a non-negative number. |
326 | // FIXME: Maybe it'd be better to have consistency in |
327 | // "$x - $y" vs. "$y - $x" because those are solver's keys. |
328 | if (LInt > RInt) { |
329 | ResultSym = SymMgr.getSymSymExpr(lhs: RSym, op: BO_Sub, rhs: LSym, t: SymTy); |
330 | ResultOp = BinaryOperator::reverseComparisonOp(Opc: Op); |
331 | ResultInt = LInt - RInt; // Opposite order! |
332 | } else { |
333 | ResultSym = SymMgr.getSymSymExpr(lhs: LSym, op: BO_Sub, rhs: RSym, t: SymTy); |
334 | ResultOp = Op; |
335 | ResultInt = RInt - LInt; // Opposite order! |
336 | } |
337 | } else { |
338 | ResultSym = SymMgr.getSymSymExpr(lhs: LSym, op: Op, rhs: RSym, t: SymTy); |
339 | ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt); |
340 | ResultOp = BO_Add; |
341 | // Bring back the cosmetic difference. |
342 | if (ResultInt < 0) { |
343 | ResultInt = -ResultInt; |
344 | ResultOp = BO_Sub; |
345 | } else if (ResultInt == 0) { |
346 | // Shortcut: Simplify "$x + 0" to "$x". |
347 | return nonloc::SymbolVal(ResultSym); |
348 | } |
349 | } |
350 | const llvm::APSInt &PersistentResultInt = BV.getValue(X: ResultInt); |
351 | return nonloc::SymbolVal( |
352 | SymMgr.getSymIntExpr(lhs: ResultSym, op: ResultOp, rhs: PersistentResultInt, t: ResultTy)); |
353 | } |
354 | |
355 | // Rearrange if symbol type matches the result type and if the operator is a |
356 | // comparison operator, both symbol and constant must be within constant |
357 | // overflow bounds. |
358 | static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, |
359 | SymbolRef Sym, llvm::APSInt Int, QualType Ty) { |
360 | return Sym->getType() == Ty && |
361 | (!BinaryOperator::isComparisonOp(Opc: Op) || |
362 | (isWithinConstantOverflowBounds(Sym, State) && |
363 | isWithinConstantOverflowBounds(I: Int))); |
364 | } |
365 | |
366 | static std::optional<NonLoc> tryRearrange(ProgramStateRef State, |
367 | BinaryOperator::Opcode Op, NonLoc Lhs, |
368 | NonLoc Rhs, QualType ResultTy) { |
369 | ProgramStateManager &StateMgr = State->getStateManager(); |
370 | SValBuilder &SVB = StateMgr.getSValBuilder(); |
371 | |
372 | // We expect everything to be of the same type - this type. |
373 | QualType SingleTy; |
374 | |
375 | // FIXME: After putting complexity threshold to the symbols we can always |
376 | // rearrange additive operations but rearrange comparisons only if |
377 | // option is set. |
378 | if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) |
379 | return std::nullopt; |
380 | |
381 | SymbolRef LSym = Lhs.getAsSymbol(); |
382 | if (!LSym) |
383 | return std::nullopt; |
384 | |
385 | if (BinaryOperator::isComparisonOp(Opc: Op)) { |
386 | SingleTy = LSym->getType(); |
387 | if (ResultTy != SVB.getConditionType()) |
388 | return std::nullopt; |
389 | // Initialize SingleTy later with a symbol's type. |
390 | } else if (BinaryOperator::isAdditiveOp(Opc: Op)) { |
391 | SingleTy = ResultTy; |
392 | if (LSym->getType() != SingleTy) |
393 | return std::nullopt; |
394 | } else { |
395 | // Don't rearrange other operations. |
396 | return std::nullopt; |
397 | } |
398 | |
399 | assert(!SingleTy.isNull() && "We should have figured out the type by now!" ); |
400 | |
401 | // Rearrange signed symbolic expressions only |
402 | if (!SingleTy->isSignedIntegerOrEnumerationType()) |
403 | return std::nullopt; |
404 | |
405 | SymbolRef RSym = Rhs.getAsSymbol(); |
406 | if (!RSym || RSym->getType() != SingleTy) |
407 | return std::nullopt; |
408 | |
409 | BasicValueFactory &BV = State->getBasicVals(); |
410 | llvm::APSInt LInt, RInt; |
411 | std::tie(args&: LSym, args&: LInt) = decomposeSymbol(Sym: LSym, BV); |
412 | std::tie(args&: RSym, args&: RInt) = decomposeSymbol(Sym: RSym, BV); |
413 | if (!shouldRearrange(State, Op, Sym: LSym, Int: LInt, Ty: SingleTy) || |
414 | !shouldRearrange(State, Op, Sym: RSym, Int: RInt, Ty: SingleTy)) |
415 | return std::nullopt; |
416 | |
417 | // We know that no overflows can occur anymore. |
418 | return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt); |
419 | } |
420 | |
421 | SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, |
422 | BinaryOperator::Opcode op, |
423 | NonLoc lhs, NonLoc rhs, |
424 | QualType resultTy) { |
425 | NonLoc InputLHS = lhs; |
426 | NonLoc InputRHS = rhs; |
427 | |
428 | // Constraints may have changed since the creation of a bound SVal. Check if |
429 | // the values can be simplified based on those new constraints. |
430 | SVal simplifiedLhs = simplifySVal(State: state, V: lhs); |
431 | SVal simplifiedRhs = simplifySVal(State: state, V: rhs); |
432 | if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) |
433 | lhs = *simplifiedLhsAsNonLoc; |
434 | if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) |
435 | rhs = *simplifiedRhsAsNonLoc; |
436 | |
437 | // Handle trivial case where left-side and right-side are the same. |
438 | if (lhs == rhs) |
439 | switch (op) { |
440 | default: |
441 | break; |
442 | case BO_EQ: |
443 | case BO_LE: |
444 | case BO_GE: |
445 | return makeTruthVal(true, resultTy); |
446 | case BO_LT: |
447 | case BO_GT: |
448 | case BO_NE: |
449 | return makeTruthVal(false, resultTy); |
450 | case BO_Xor: |
451 | case BO_Sub: |
452 | if (resultTy->isIntegralOrEnumerationType()) |
453 | return makeIntVal(0, resultTy); |
454 | return evalCast(V: makeIntVal(0, /*isUnsigned=*/false), CastTy: resultTy, |
455 | OriginalTy: QualType{}); |
456 | case BO_Or: |
457 | case BO_And: |
458 | return evalCast(lhs, resultTy, QualType{}); |
459 | } |
460 | |
461 | while (true) { |
462 | switch (lhs.getKind()) { |
463 | default: |
464 | return makeSymExprValNN(op, lhs, rhs, resultTy); |
465 | case nonloc::PointerToMemberKind: { |
466 | assert(rhs.getKind() == nonloc::PointerToMemberKind && |
467 | "Both SVals should have pointer-to-member-type" ); |
468 | auto LPTM = lhs.castAs<nonloc::PointerToMember>(), |
469 | RPTM = rhs.castAs<nonloc::PointerToMember>(); |
470 | auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData(); |
471 | switch (op) { |
472 | case BO_EQ: |
473 | return makeTruthVal(LPTMD == RPTMD, resultTy); |
474 | case BO_NE: |
475 | return makeTruthVal(LPTMD != RPTMD, resultTy); |
476 | default: |
477 | return UnknownVal(); |
478 | } |
479 | } |
480 | case nonloc::LocAsIntegerKind: { |
481 | Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc(); |
482 | switch (rhs.getKind()) { |
483 | case nonloc::LocAsIntegerKind: |
484 | // FIXME: at the moment the implementation |
485 | // of modeling "pointers as integers" is not complete. |
486 | if (!BinaryOperator::isComparisonOp(Opc: op)) |
487 | return UnknownVal(); |
488 | return evalBinOpLL(state, op, lhs: lhsL, |
489 | rhs: rhs.castAs<nonloc::LocAsInteger>().getLoc(), |
490 | resultTy); |
491 | case nonloc::ConcreteIntKind: { |
492 | // FIXME: at the moment the implementation |
493 | // of modeling "pointers as integers" is not complete. |
494 | if (!BinaryOperator::isComparisonOp(Opc: op)) |
495 | return UnknownVal(); |
496 | // Transform the integer into a location and compare. |
497 | // FIXME: This only makes sense for comparisons. If we want to, say, |
498 | // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it, |
499 | // then pack it back into a LocAsInteger. |
500 | llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue(); |
501 | // If the region has a symbolic base, pay attention to the type; it |
502 | // might be coming from a non-default address space. For non-symbolic |
503 | // regions it doesn't matter that much because such comparisons would |
504 | // most likely evaluate to concrete false anyway. FIXME: We might |
505 | // still need to handle the non-comparison case. |
506 | if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true)) |
507 | BasicVals.getAPSIntType(T: lSym->getType()).apply(Value&: i); |
508 | else |
509 | BasicVals.getAPSIntType(T: Context.VoidPtrTy).apply(i); |
510 | return evalBinOpLL(state, op, lhs: lhsL, rhs: makeLoc(i), resultTy); |
511 | } |
512 | default: |
513 | switch (op) { |
514 | case BO_EQ: |
515 | return makeTruthVal(false, resultTy); |
516 | case BO_NE: |
517 | return makeTruthVal(true, resultTy); |
518 | default: |
519 | // This case also handles pointer arithmetic. |
520 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |
521 | } |
522 | } |
523 | } |
524 | case nonloc::ConcreteIntKind: { |
525 | llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue(); |
526 | |
527 | // If we're dealing with two known constants, just perform the operation. |
528 | if (const llvm::APSInt *KnownRHSValue = getConstValue(state, V: rhs)) { |
529 | llvm::APSInt RHSValue = *KnownRHSValue; |
530 | if (BinaryOperator::isComparisonOp(Opc: op)) { |
531 | // We're looking for a type big enough to compare the two values. |
532 | // FIXME: This is not correct. char + short will result in a promotion |
533 | // to int. Unfortunately we have lost types by this point. |
534 | APSIntType CompareType = std::max(a: APSIntType(LHSValue), |
535 | b: APSIntType(RHSValue)); |
536 | CompareType.apply(Value&: LHSValue); |
537 | CompareType.apply(Value&: RHSValue); |
538 | } else if (!BinaryOperator::isShiftOp(Opc: op)) { |
539 | APSIntType IntType = BasicVals.getAPSIntType(T: resultTy); |
540 | IntType.apply(Value&: LHSValue); |
541 | IntType.apply(Value&: RHSValue); |
542 | } |
543 | |
544 | const llvm::APSInt *Result = |
545 | BasicVals.evalAPSInt(Op: op, V1: LHSValue, V2: RHSValue); |
546 | if (!Result) { |
547 | if (op == BO_Shl || op == BO_Shr) { |
548 | // FIXME: At this point the constant folding claims that the result |
549 | // of a bitwise shift is undefined. However, constant folding |
550 | // relies on the inaccurate type information that is stored in the |
551 | // bit size of APSInt objects, and if we reached this point, then |
552 | // the checker core.BitwiseShift already determined that the shift |
553 | // is valid (in a PreStmt callback, by querying the real type from |
554 | // the AST node). |
555 | // To avoid embarrassing false positives, let's just say that we |
556 | // don't know anything about the result of the shift. |
557 | return UnknownVal(); |
558 | } |
559 | return UndefinedVal(); |
560 | } |
561 | |
562 | return nonloc::ConcreteInt(*Result); |
563 | } |
564 | |
565 | // Swap the left and right sides and flip the operator if doing so |
566 | // allows us to better reason about the expression (this is a form |
567 | // of expression canonicalization). |
568 | // While we're at it, catch some special cases for non-commutative ops. |
569 | switch (op) { |
570 | case BO_LT: |
571 | case BO_GT: |
572 | case BO_LE: |
573 | case BO_GE: |
574 | op = BinaryOperator::reverseComparisonOp(Opc: op); |
575 | [[fallthrough]]; |
576 | case BO_EQ: |
577 | case BO_NE: |
578 | case BO_Add: |
579 | case BO_Mul: |
580 | case BO_And: |
581 | case BO_Xor: |
582 | case BO_Or: |
583 | std::swap(a&: lhs, b&: rhs); |
584 | continue; |
585 | case BO_Shr: |
586 | // (~0)>>a |
587 | if (LHSValue.isAllOnes() && LHSValue.isSigned()) |
588 | return evalCast(lhs, resultTy, QualType{}); |
589 | [[fallthrough]]; |
590 | case BO_Shl: |
591 | // 0<<a and 0>>a |
592 | if (LHSValue == 0) |
593 | return evalCast(lhs, resultTy, QualType{}); |
594 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |
595 | case BO_Div: |
596 | // 0 / x == 0 |
597 | case BO_Rem: |
598 | // 0 % x == 0 |
599 | if (LHSValue == 0) |
600 | return makeZeroVal(resultTy); |
601 | [[fallthrough]]; |
602 | default: |
603 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |
604 | } |
605 | } |
606 | case nonloc::SymbolValKind: { |
607 | // We only handle LHS as simple symbols or SymIntExprs. |
608 | SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); |
609 | |
610 | // LHS is a symbolic expression. |
611 | if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Val: Sym)) { |
612 | |
613 | // Is this a logical not? (!x is represented as x == 0.) |
614 | if (op == BO_EQ && rhs.isZeroConstant()) { |
615 | // We know how to negate certain expressions. Simplify them here. |
616 | |
617 | BinaryOperator::Opcode opc = symIntExpr->getOpcode(); |
618 | switch (opc) { |
619 | default: |
620 | // We don't know how to negate this operation. |
621 | // Just handle it as if it were a normal comparison to 0. |
622 | break; |
623 | case BO_LAnd: |
624 | case BO_LOr: |
625 | llvm_unreachable("Logical operators handled by branching logic." ); |
626 | case BO_Assign: |
627 | case BO_MulAssign: |
628 | case BO_DivAssign: |
629 | case BO_RemAssign: |
630 | case BO_AddAssign: |
631 | case BO_SubAssign: |
632 | case BO_ShlAssign: |
633 | case BO_ShrAssign: |
634 | case BO_AndAssign: |
635 | case BO_XorAssign: |
636 | case BO_OrAssign: |
637 | case BO_Comma: |
638 | llvm_unreachable("'=' and ',' operators handled by ExprEngine." ); |
639 | case BO_PtrMemD: |
640 | case BO_PtrMemI: |
641 | llvm_unreachable("Pointer arithmetic not handled here." ); |
642 | case BO_LT: |
643 | case BO_GT: |
644 | case BO_LE: |
645 | case BO_GE: |
646 | case BO_EQ: |
647 | case BO_NE: |
648 | assert(resultTy->isBooleanType() || |
649 | resultTy == getConditionType()); |
650 | assert(symIntExpr->getType()->isBooleanType() || |
651 | getContext().hasSameUnqualifiedType(symIntExpr->getType(), |
652 | getConditionType())); |
653 | // Negate the comparison and make a value. |
654 | opc = BinaryOperator::negateComparisonOp(Opc: opc); |
655 | return makeNonLoc(symIntExpr->getLHS(), opc, |
656 | symIntExpr->getRHS(), resultTy); |
657 | } |
658 | } |
659 | |
660 | // For now, only handle expressions whose RHS is a constant. |
661 | if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs)) { |
662 | // If both the LHS and the current expression are additive, |
663 | // fold their constants and try again. |
664 | if (BinaryOperator::isAdditiveOp(Opc: op)) { |
665 | BinaryOperator::Opcode lop = symIntExpr->getOpcode(); |
666 | if (BinaryOperator::isAdditiveOp(Opc: lop)) { |
667 | // Convert the two constants to a common type, then combine them. |
668 | |
669 | // resultTy may not be the best type to convert to, but it's |
670 | // probably the best choice in expressions with mixed type |
671 | // (such as x+1U+2LL). The rules for implicit conversions should |
672 | // choose a reasonable type to preserve the expression, and will |
673 | // at least match how the value is going to be used. |
674 | APSIntType IntType = BasicVals.getAPSIntType(T: resultTy); |
675 | const llvm::APSInt &first = IntType.convert(Value: symIntExpr->getRHS()); |
676 | const llvm::APSInt &second = IntType.convert(Value: *RHSValue); |
677 | |
678 | // If the op and lop agrees, then we just need to |
679 | // sum the constants. Otherwise, we change to operation |
680 | // type if substraction would produce negative value |
681 | // (and cause overflow for unsigned integers), |
682 | // as consequence x+1U-10 produces x-9U, instead |
683 | // of x+4294967287U, that would be produced without this |
684 | // additional check. |
685 | const llvm::APSInt *newRHS; |
686 | if (lop == op) { |
687 | newRHS = BasicVals.evalAPSInt(Op: BO_Add, V1: first, V2: second); |
688 | } else if (first >= second) { |
689 | newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: first, V2: second); |
690 | op = lop; |
691 | } else { |
692 | newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: second, V2: first); |
693 | } |
694 | |
695 | assert(newRHS && "Invalid operation despite common type!" ); |
696 | rhs = nonloc::ConcreteInt(*newRHS); |
697 | lhs = nonloc::SymbolVal(symIntExpr->getLHS()); |
698 | continue; |
699 | } |
700 | } |
701 | |
702 | // Otherwise, make a SymIntExpr out of the expression. |
703 | return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy); |
704 | } |
705 | } |
706 | |
707 | // Is the RHS a constant? |
708 | if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs)) |
709 | return MakeSymIntVal(LHS: Sym, op, RHS: *RHSValue, resultTy); |
710 | |
711 | if (std::optional<NonLoc> V = tryRearrange(State: state, Op: op, Lhs: lhs, Rhs: rhs, ResultTy: resultTy)) |
712 | return *V; |
713 | |
714 | // Give up -- this is not a symbolic expression we can handle. |
715 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |
716 | } |
717 | } |
718 | } |
719 | } |
720 | |
721 | static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, |
722 | const FieldRegion *RightFR, |
723 | BinaryOperator::Opcode op, |
724 | QualType resultTy, |
725 | SimpleSValBuilder &SVB) { |
726 | // Only comparisons are meaningful here! |
727 | if (!BinaryOperator::isComparisonOp(Opc: op)) |
728 | return UnknownVal(); |
729 | |
730 | // Next, see if the two FRs have the same super-region. |
731 | // FIXME: This doesn't handle casts yet, and simply stripping the casts |
732 | // doesn't help. |
733 | if (LeftFR->getSuperRegion() != RightFR->getSuperRegion()) |
734 | return UnknownVal(); |
735 | |
736 | const FieldDecl *LeftFD = LeftFR->getDecl(); |
737 | const FieldDecl *RightFD = RightFR->getDecl(); |
738 | const RecordDecl *RD = LeftFD->getParent(); |
739 | |
740 | // Make sure the two FRs are from the same kind of record. Just in case! |
741 | // FIXME: This is probably where inheritance would be a problem. |
742 | if (RD != RightFD->getParent()) |
743 | return UnknownVal(); |
744 | |
745 | // We know for sure that the two fields are not the same, since that |
746 | // would have given us the same SVal. |
747 | if (op == BO_EQ) |
748 | return SVB.makeTruthVal(false, resultTy); |
749 | if (op == BO_NE) |
750 | return SVB.makeTruthVal(true, resultTy); |
751 | |
752 | // Iterate through the fields and see which one comes first. |
753 | // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field |
754 | // members and the units in which bit-fields reside have addresses that |
755 | // increase in the order in which they are declared." |
756 | bool leftFirst = (op == BO_LT || op == BO_LE); |
757 | for (const auto *I : RD->fields()) { |
758 | if (I == LeftFD) |
759 | return SVB.makeTruthVal(leftFirst, resultTy); |
760 | if (I == RightFD) |
761 | return SVB.makeTruthVal(!leftFirst, resultTy); |
762 | } |
763 | |
764 | llvm_unreachable("Fields not found in parent record's definition" ); |
765 | } |
766 | |
767 | // This is used in debug builds only for now because some downstream users |
768 | // may hit this assert in their subsequent merges. |
769 | // There are still places in the analyzer where equal bitwidth Locs |
770 | // are compared, and need to be found and corrected. Recent previous fixes have |
771 | // addressed the known problems of making NULLs with specific bitwidths |
772 | // for Loc comparisons along with deprecation of APIs for the same purpose. |
773 | // |
774 | static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, |
775 | Loc LhsLoc) { |
776 | // Implements a "best effort" check for RhsLoc and LhsLoc bit widths |
777 | ASTContext &Ctx = State->getStateManager().getContext(); |
778 | uint64_t RhsBitwidth = |
779 | RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: RhsLoc.getType(Ctx)); |
780 | uint64_t LhsBitwidth = |
781 | LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: LhsLoc.getType(Ctx)); |
782 | if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) { |
783 | assert(RhsBitwidth == LhsBitwidth && |
784 | "RhsLoc and LhsLoc bitwidth must be same!" ); |
785 | } |
786 | } |
787 | |
788 | // FIXME: all this logic will change if/when we have MemRegion::getLocation(). |
789 | SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, |
790 | BinaryOperator::Opcode op, |
791 | Loc lhs, Loc rhs, |
792 | QualType resultTy) { |
793 | |
794 | // Assert that bitwidth of lhs and rhs are the same. |
795 | // This can happen if two different address spaces are used, |
796 | // and the bitwidths of the address spaces are different. |
797 | // See LIT case clang/test/Analysis/cstring-checker-addressspace.c |
798 | // FIXME: See comment above in the function assertEqualBitWidths |
799 | assertEqualBitWidths(State: state, RhsLoc: rhs, LhsLoc: lhs); |
800 | |
801 | // Only comparisons and subtractions are valid operations on two pointers. |
802 | // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15]. |
803 | // However, if a pointer is casted to an integer, evalBinOpNN may end up |
804 | // calling this function with another operation (PR7527). We don't attempt to |
805 | // model this for now, but it could be useful, particularly when the |
806 | // "location" is actually an integer value that's been passed through a void*. |
807 | if (!(BinaryOperator::isComparisonOp(Opc: op) || op == BO_Sub)) |
808 | return UnknownVal(); |
809 | |
810 | // Special cases for when both sides are identical. |
811 | if (lhs == rhs) { |
812 | switch (op) { |
813 | default: |
814 | llvm_unreachable("Unimplemented operation for two identical values" ); |
815 | case BO_Sub: |
816 | return makeZeroVal(resultTy); |
817 | case BO_EQ: |
818 | case BO_LE: |
819 | case BO_GE: |
820 | return makeTruthVal(true, resultTy); |
821 | case BO_NE: |
822 | case BO_LT: |
823 | case BO_GT: |
824 | return makeTruthVal(false, resultTy); |
825 | } |
826 | } |
827 | |
828 | switch (lhs.getKind()) { |
829 | default: |
830 | llvm_unreachable("Ordering not implemented for this Loc." ); |
831 | |
832 | case loc::GotoLabelKind: |
833 | // The only thing we know about labels is that they're non-null. |
834 | if (rhs.isZeroConstant()) { |
835 | switch (op) { |
836 | default: |
837 | break; |
838 | case BO_Sub: |
839 | return evalCast(lhs, resultTy, QualType{}); |
840 | case BO_EQ: |
841 | case BO_LE: |
842 | case BO_LT: |
843 | return makeTruthVal(false, resultTy); |
844 | case BO_NE: |
845 | case BO_GT: |
846 | case BO_GE: |
847 | return makeTruthVal(true, resultTy); |
848 | } |
849 | } |
850 | // There may be two labels for the same location, and a function region may |
851 | // have the same address as a label at the start of the function (depending |
852 | // on the ABI). |
853 | // FIXME: we can probably do a comparison against other MemRegions, though. |
854 | // FIXME: is there a way to tell if two labels refer to the same location? |
855 | return UnknownVal(); |
856 | |
857 | case loc::ConcreteIntKind: { |
858 | auto L = lhs.castAs<loc::ConcreteInt>(); |
859 | |
860 | // If one of the operands is a symbol and the other is a constant, |
861 | // build an expression for use by the constraint manager. |
862 | if (SymbolRef rSym = rhs.getAsLocSymbol()) { |
863 | // We can only build expressions with symbols on the left, |
864 | // so we need a reversible operator. |
865 | if (!BinaryOperator::isComparisonOp(Opc: op) || op == BO_Cmp) |
866 | return UnknownVal(); |
867 | |
868 | op = BinaryOperator::reverseComparisonOp(Opc: op); |
869 | return makeNonLoc(rSym, op, L.getValue(), resultTy); |
870 | } |
871 | |
872 | // If both operands are constants, just perform the operation. |
873 | if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { |
874 | assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub); |
875 | |
876 | if (const auto *ResultInt = |
877 | BasicVals.evalAPSInt(Op: op, V1: L.getValue(), V2: rInt->getValue())) |
878 | return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{}); |
879 | return UnknownVal(); |
880 | } |
881 | |
882 | // Special case comparisons against NULL. |
883 | // This must come after the test if the RHS is a symbol, which is used to |
884 | // build constraints. The address of any non-symbolic region is guaranteed |
885 | // to be non-NULL, as is any label. |
886 | assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs))); |
887 | if (lhs.isZeroConstant()) { |
888 | switch (op) { |
889 | default: |
890 | break; |
891 | case BO_EQ: |
892 | case BO_GT: |
893 | case BO_GE: |
894 | return makeTruthVal(false, resultTy); |
895 | case BO_NE: |
896 | case BO_LT: |
897 | case BO_LE: |
898 | return makeTruthVal(true, resultTy); |
899 | } |
900 | } |
901 | |
902 | // Comparing an arbitrary integer to a region or label address is |
903 | // completely unknowable. |
904 | return UnknownVal(); |
905 | } |
906 | case loc::MemRegionValKind: { |
907 | if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { |
908 | // If one of the operands is a symbol and the other is a constant, |
909 | // build an expression for use by the constraint manager. |
910 | if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true)) { |
911 | if (BinaryOperator::isComparisonOp(Opc: op)) |
912 | return MakeSymIntVal(LHS: lSym, op, RHS: rInt->getValue(), resultTy); |
913 | return UnknownVal(); |
914 | } |
915 | // Special case comparisons to NULL. |
916 | // This must come after the test if the LHS is a symbol, which is used to |
917 | // build constraints. The address of any non-symbolic region is guaranteed |
918 | // to be non-NULL. |
919 | if (rInt->isZeroConstant()) { |
920 | if (op == BO_Sub) |
921 | return evalCast(lhs, resultTy, QualType{}); |
922 | |
923 | if (BinaryOperator::isComparisonOp(Opc: op)) { |
924 | QualType boolType = getContext().BoolTy; |
925 | NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>(); |
926 | NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>(); |
927 | return evalBinOpNN(state, op, lhs: l, rhs: r, resultTy); |
928 | } |
929 | } |
930 | |
931 | // Comparing a region to an arbitrary integer is completely unknowable. |
932 | return UnknownVal(); |
933 | } |
934 | |
935 | // Get both values as regions, if possible. |
936 | const MemRegion *LeftMR = lhs.getAsRegion(); |
937 | assert(LeftMR && "MemRegionValKind SVal doesn't have a region!" ); |
938 | |
939 | const MemRegion *RightMR = rhs.getAsRegion(); |
940 | if (!RightMR) |
941 | // The RHS is probably a label, which in theory could address a region. |
942 | // FIXME: we can probably make a more useful statement about non-code |
943 | // regions, though. |
944 | return UnknownVal(); |
945 | |
946 | const MemRegion *LeftBase = LeftMR->getBaseRegion(); |
947 | const MemRegion *RightBase = RightMR->getBaseRegion(); |
948 | const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(); |
949 | const MemSpaceRegion *RightMS = RightBase->getMemorySpace(); |
950 | const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion(); |
951 | |
952 | // If the two regions are from different known memory spaces they cannot be |
953 | // equal. Also, assume that no symbolic region (whose memory space is |
954 | // unknown) is on the stack. |
955 | if (LeftMS != RightMS && |
956 | ((LeftMS != UnknownMS && RightMS != UnknownMS) || |
957 | (isa<StackSpaceRegion>(Val: LeftMS) || isa<StackSpaceRegion>(Val: RightMS)))) { |
958 | switch (op) { |
959 | default: |
960 | return UnknownVal(); |
961 | case BO_EQ: |
962 | return makeTruthVal(false, resultTy); |
963 | case BO_NE: |
964 | return makeTruthVal(true, resultTy); |
965 | } |
966 | } |
967 | |
968 | // If both values wrap regions, see if they're from different base regions. |
969 | // Note, heap base symbolic regions are assumed to not alias with |
970 | // each other; for example, we assume that malloc returns different address |
971 | // on each invocation. |
972 | // FIXME: ObjC object pointers always reside on the heap, but currently |
973 | // we treat their memory space as unknown, because symbolic pointers |
974 | // to ObjC objects may alias. There should be a way to construct |
975 | // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker |
976 | // guesses memory space for ObjC object pointers manually instead of |
977 | // relying on us. |
978 | if (LeftBase != RightBase && |
979 | ((!isa<SymbolicRegion>(Val: LeftBase) && !isa<SymbolicRegion>(Val: RightBase)) || |
980 | (isa<HeapSpaceRegion>(Val: LeftMS) || isa<HeapSpaceRegion>(Val: RightMS))) ){ |
981 | switch (op) { |
982 | default: |
983 | return UnknownVal(); |
984 | case BO_EQ: |
985 | return makeTruthVal(false, resultTy); |
986 | case BO_NE: |
987 | return makeTruthVal(true, resultTy); |
988 | } |
989 | } |
990 | |
991 | // Handle special cases for when both regions are element regions. |
992 | const ElementRegion *RightER = dyn_cast<ElementRegion>(Val: RightMR); |
993 | const ElementRegion *LeftER = dyn_cast<ElementRegion>(Val: LeftMR); |
994 | if (RightER && LeftER) { |
995 | // Next, see if the two ERs have the same super-region and matching types. |
996 | // FIXME: This should do something useful even if the types don't match, |
997 | // though if both indexes are constant the RegionRawOffset path will |
998 | // give the correct answer. |
999 | if (LeftER->getSuperRegion() == RightER->getSuperRegion() && |
1000 | LeftER->getElementType() == RightER->getElementType()) { |
1001 | // Get the left index and cast it to the correct type. |
1002 | // If the index is unknown or undefined, bail out here. |
1003 | SVal LeftIndexVal = LeftER->getIndex(); |
1004 | std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>(); |
1005 | if (!LeftIndex) |
1006 | return UnknownVal(); |
1007 | LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{}); |
1008 | LeftIndex = LeftIndexVal.getAs<NonLoc>(); |
1009 | if (!LeftIndex) |
1010 | return UnknownVal(); |
1011 | |
1012 | // Do the same for the right index. |
1013 | SVal RightIndexVal = RightER->getIndex(); |
1014 | std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>(); |
1015 | if (!RightIndex) |
1016 | return UnknownVal(); |
1017 | RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{}); |
1018 | RightIndex = RightIndexVal.getAs<NonLoc>(); |
1019 | if (!RightIndex) |
1020 | return UnknownVal(); |
1021 | |
1022 | // Actually perform the operation. |
1023 | // evalBinOpNN expects the two indexes to already be the right type. |
1024 | return evalBinOpNN(state, op, lhs: *LeftIndex, rhs: *RightIndex, resultTy); |
1025 | } |
1026 | } |
1027 | |
1028 | // Special handling of the FieldRegions, even with symbolic offsets. |
1029 | const FieldRegion *RightFR = dyn_cast<FieldRegion>(Val: RightMR); |
1030 | const FieldRegion *LeftFR = dyn_cast<FieldRegion>(Val: LeftMR); |
1031 | if (RightFR && LeftFR) { |
1032 | SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy, |
1033 | SVB&: *this); |
1034 | if (!R.isUnknown()) |
1035 | return R; |
1036 | } |
1037 | |
1038 | // Compare the regions using the raw offsets. |
1039 | RegionOffset LeftOffset = LeftMR->getAsOffset(); |
1040 | RegionOffset RightOffset = RightMR->getAsOffset(); |
1041 | |
1042 | if (LeftOffset.getRegion() != nullptr && |
1043 | LeftOffset.getRegion() == RightOffset.getRegion() && |
1044 | !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) { |
1045 | int64_t left = LeftOffset.getOffset(); |
1046 | int64_t right = RightOffset.getOffset(); |
1047 | |
1048 | switch (op) { |
1049 | default: |
1050 | return UnknownVal(); |
1051 | case BO_LT: |
1052 | return makeTruthVal(left < right, resultTy); |
1053 | case BO_GT: |
1054 | return makeTruthVal(left > right, resultTy); |
1055 | case BO_LE: |
1056 | return makeTruthVal(left <= right, resultTy); |
1057 | case BO_GE: |
1058 | return makeTruthVal(left >= right, resultTy); |
1059 | case BO_EQ: |
1060 | return makeTruthVal(left == right, resultTy); |
1061 | case BO_NE: |
1062 | return makeTruthVal(left != right, resultTy); |
1063 | } |
1064 | } |
1065 | |
1066 | // At this point we're not going to get a good answer, but we can try |
1067 | // conjuring an expression instead. |
1068 | SymbolRef LHSSym = lhs.getAsLocSymbol(); |
1069 | SymbolRef RHSSym = rhs.getAsLocSymbol(); |
1070 | if (LHSSym && RHSSym) |
1071 | return makeNonLoc(LHSSym, op, RHSSym, resultTy); |
1072 | |
1073 | // If we get here, we have no way of comparing the regions. |
1074 | return UnknownVal(); |
1075 | } |
1076 | } |
1077 | } |
1078 | |
1079 | SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, |
1080 | BinaryOperator::Opcode op, Loc lhs, |
1081 | NonLoc rhs, QualType resultTy) { |
1082 | if (op >= BO_PtrMemD && op <= BO_PtrMemI) { |
1083 | if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) { |
1084 | if (PTMSV->isNullMemberPointer()) |
1085 | return UndefinedVal(); |
1086 | |
1087 | auto getFieldLValue = [&](const auto *FD) -> SVal { |
1088 | SVal Result = lhs; |
1089 | |
1090 | for (const auto &I : *PTMSV) |
1091 | Result = StateMgr.getStoreManager().evalDerivedToBase( |
1092 | Derived: Result, DerivedPtrType: I->getType(), IsVirtual: I->isVirtual()); |
1093 | |
1094 | return state->getLValue(FD, Result); |
1095 | }; |
1096 | |
1097 | if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) { |
1098 | return getFieldLValue(FD); |
1099 | } |
1100 | if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) { |
1101 | return getFieldLValue(FD); |
1102 | } |
1103 | } |
1104 | |
1105 | return rhs; |
1106 | } |
1107 | |
1108 | assert(!BinaryOperator::isComparisonOp(op) && |
1109 | "arguments to comparison ops must be of the same type" ); |
1110 | |
1111 | // Special case: rhs is a zero constant. |
1112 | if (rhs.isZeroConstant()) |
1113 | return lhs; |
1114 | |
1115 | // Perserve the null pointer so that it can be found by the DerefChecker. |
1116 | if (lhs.isZeroConstant()) |
1117 | return lhs; |
1118 | |
1119 | // We are dealing with pointer arithmetic. |
1120 | |
1121 | // Handle pointer arithmetic on constant values. |
1122 | if (std::optional<nonloc::ConcreteInt> rhsInt = |
1123 | rhs.getAs<nonloc::ConcreteInt>()) { |
1124 | if (std::optional<loc::ConcreteInt> lhsInt = |
1125 | lhs.getAs<loc::ConcreteInt>()) { |
1126 | const llvm::APSInt &leftI = lhsInt->getValue(); |
1127 | assert(leftI.isUnsigned()); |
1128 | llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true); |
1129 | |
1130 | // Convert the bitwidth of rightI. This should deal with overflow |
1131 | // since we are dealing with concrete values. |
1132 | rightI = rightI.extOrTrunc(width: leftI.getBitWidth()); |
1133 | |
1134 | // Offset the increment by the pointer size. |
1135 | llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true); |
1136 | QualType pointeeType = resultTy->getPointeeType(); |
1137 | Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity(); |
1138 | rightI *= Multiplicand; |
1139 | |
1140 | // Compute the adjusted pointer. |
1141 | switch (op) { |
1142 | case BO_Add: |
1143 | rightI = leftI + rightI; |
1144 | break; |
1145 | case BO_Sub: |
1146 | rightI = leftI - rightI; |
1147 | break; |
1148 | default: |
1149 | llvm_unreachable("Invalid pointer arithmetic operation" ); |
1150 | } |
1151 | return loc::ConcreteInt(getBasicValueFactory().getValue(rightI)); |
1152 | } |
1153 | } |
1154 | |
1155 | // Handle cases where 'lhs' is a region. |
1156 | if (const MemRegion *region = lhs.getAsRegion()) { |
1157 | rhs = convertToArrayIndex(rhs).castAs<NonLoc>(); |
1158 | SVal index = UnknownVal(); |
1159 | const SubRegion *superR = nullptr; |
1160 | // We need to know the type of the pointer in order to add an integer to it. |
1161 | // Depending on the type, different amount of bytes is added. |
1162 | QualType elementType; |
1163 | |
1164 | if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(Val: region)) { |
1165 | assert(op == BO_Add || op == BO_Sub); |
1166 | index = evalBinOpNN(state, op, lhs: elemReg->getIndex(), rhs, |
1167 | resultTy: getArrayIndexType()); |
1168 | superR = cast<SubRegion>(elemReg->getSuperRegion()); |
1169 | elementType = elemReg->getElementType(); |
1170 | } |
1171 | else if (isa<SubRegion>(Val: region)) { |
1172 | assert(op == BO_Add || op == BO_Sub); |
1173 | index = (op == BO_Add) ? rhs : evalMinus(rhs); |
1174 | superR = cast<SubRegion>(Val: region); |
1175 | // TODO: Is this actually reliable? Maybe improving our MemRegion |
1176 | // hierarchy to provide typed regions for all non-void pointers would be |
1177 | // better. For instance, we cannot extend this towards LocAsInteger |
1178 | // operations, where result type of the expression is integer. |
1179 | if (resultTy->isAnyPointerType()) |
1180 | elementType = resultTy->getPointeeType(); |
1181 | } |
1182 | |
1183 | // Represent arithmetic on void pointers as arithmetic on char pointers. |
1184 | // It is fine when a TypedValueRegion of char value type represents |
1185 | // a void pointer. Note that arithmetic on void pointers is a GCC extension. |
1186 | if (elementType->isVoidType()) |
1187 | elementType = getContext().CharTy; |
1188 | |
1189 | if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) { |
1190 | return loc::MemRegionVal(MemMgr.getElementRegion(elementType, Idx: *indexV, |
1191 | superRegion: superR, Ctx&: getContext())); |
1192 | } |
1193 | } |
1194 | return UnknownVal(); |
1195 | } |
1196 | |
1197 | const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state, |
1198 | SVal V) { |
1199 | if (const llvm::APSInt *Res = getConcreteValue(V)) |
1200 | return Res; |
1201 | |
1202 | if (SymbolRef Sym = V.getAsSymbol()) |
1203 | return state->getConstraintManager().getSymVal(state, sym: Sym); |
1204 | |
1205 | return nullptr; |
1206 | } |
1207 | |
1208 | const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) { |
1209 | if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>()) |
1210 | return &X->getValue(); |
1211 | |
1212 | if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>()) |
1213 | return &X->getValue(); |
1214 | |
1215 | return nullptr; |
1216 | } |
1217 | |
1218 | const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, |
1219 | SVal V) { |
1220 | return getConstValue(state, V: simplifySVal(State: state, V)); |
1221 | } |
1222 | |
1223 | const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state, |
1224 | SVal V) { |
1225 | V = simplifySVal(State: state, V); |
1226 | |
1227 | if (const llvm::APSInt *Res = getConcreteValue(V)) |
1228 | return Res; |
1229 | |
1230 | if (SymbolRef Sym = V.getAsSymbol()) |
1231 | return state->getConstraintManager().getSymMinVal(state, sym: Sym); |
1232 | |
1233 | return nullptr; |
1234 | } |
1235 | |
1236 | const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state, |
1237 | SVal V) { |
1238 | V = simplifySVal(State: state, V); |
1239 | |
1240 | if (const llvm::APSInt *Res = getConcreteValue(V)) |
1241 | return Res; |
1242 | |
1243 | if (SymbolRef Sym = V.getAsSymbol()) |
1244 | return state->getConstraintManager().getSymMaxVal(state, sym: Sym); |
1245 | |
1246 | return nullptr; |
1247 | } |
1248 | |
1249 | SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) { |
1250 | SVal SimplifiedVal = simplifySValOnce(State, V: Val); |
1251 | while (SimplifiedVal != Val) { |
1252 | Val = SimplifiedVal; |
1253 | SimplifiedVal = simplifySValOnce(State, V: Val); |
1254 | } |
1255 | return SimplifiedVal; |
1256 | } |
1257 | |
1258 | SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { |
1259 | return simplifyUntilFixpoint(State, Val: V); |
1260 | } |
1261 | |
1262 | SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) { |
1263 | // For now, this function tries to constant-fold symbols inside a |
1264 | // nonloc::SymbolVal, and does nothing else. More simplifications should |
1265 | // be possible, such as constant-folding an index in an ElementRegion. |
1266 | |
1267 | class Simplifier : public FullSValVisitor<Simplifier, SVal> { |
1268 | ProgramStateRef State; |
1269 | SValBuilder &SVB; |
1270 | |
1271 | // Cache results for the lifetime of the Simplifier. Results change every |
1272 | // time new constraints are added to the program state, which is the whole |
1273 | // point of simplifying, and for that very reason it's pointless to maintain |
1274 | // the same cache for the duration of the whole analysis. |
1275 | llvm::DenseMap<SymbolRef, SVal> Cached; |
1276 | |
1277 | static bool isUnchanged(SymbolRef Sym, SVal Val) { |
1278 | return Sym == Val.getAsSymbol(); |
1279 | } |
1280 | |
1281 | SVal cache(SymbolRef Sym, SVal V) { |
1282 | Cached[Sym] = V; |
1283 | return V; |
1284 | } |
1285 | |
1286 | SVal skip(SymbolRef Sym) { |
1287 | return cache(Sym, V: SVB.makeSymbolVal(Sym)); |
1288 | } |
1289 | |
1290 | // Return the known const value for the Sym if available, or return Undef |
1291 | // otherwise. |
1292 | SVal getConst(SymbolRef Sym) { |
1293 | const llvm::APSInt *Const = |
1294 | State->getConstraintManager().getSymVal(state: State, sym: Sym); |
1295 | if (Const) |
1296 | return Loc::isLocType(T: Sym->getType()) ? (SVal)SVB.makeIntLocVal(integer: *Const) |
1297 | : (SVal)SVB.makeIntVal(integer: *Const); |
1298 | return UndefinedVal(); |
1299 | } |
1300 | |
1301 | SVal getConstOrVisit(SymbolRef Sym) { |
1302 | const SVal Ret = getConst(Sym); |
1303 | if (Ret.isUndef()) |
1304 | return Visit(S: Sym); |
1305 | return Ret; |
1306 | } |
1307 | |
1308 | public: |
1309 | Simplifier(ProgramStateRef State) |
1310 | : State(State), SVB(State->getStateManager().getSValBuilder()) {} |
1311 | |
1312 | SVal VisitSymbolData(const SymbolData *S) { |
1313 | // No cache here. |
1314 | if (const llvm::APSInt *I = |
1315 | State->getConstraintManager().getSymVal(state: State, sym: S)) |
1316 | return Loc::isLocType(T: S->getType()) ? (SVal)SVB.makeIntLocVal(integer: *I) |
1317 | : (SVal)SVB.makeIntVal(integer: *I); |
1318 | return SVB.makeSymbolVal(Sym: S); |
1319 | } |
1320 | |
1321 | SVal VisitSymIntExpr(const SymIntExpr *S) { |
1322 | auto I = Cached.find(S); |
1323 | if (I != Cached.end()) |
1324 | return I->second; |
1325 | |
1326 | SVal LHS = getConstOrVisit(Sym: S->getLHS()); |
1327 | if (isUnchanged(Sym: S->getLHS(), Val: LHS)) |
1328 | return skip(S); |
1329 | |
1330 | SVal RHS; |
1331 | // By looking at the APSInt in the right-hand side of S, we cannot |
1332 | // figure out if it should be treated as a Loc or as a NonLoc. |
1333 | // So make our guess by recalling that we cannot multiply pointers |
1334 | // or compare a pointer to an integer. |
1335 | if (Loc::isLocType(T: S->getLHS()->getType()) && |
1336 | BinaryOperator::isComparisonOp(S->getOpcode())) { |
1337 | // The usual conversion of $sym to &SymRegion{$sym}, as they have |
1338 | // the same meaning for Loc-type symbols, but the latter form |
1339 | // is preferred in SVal computations for being Loc itself. |
1340 | if (SymbolRef Sym = LHS.getAsSymbol()) { |
1341 | assert(Loc::isLocType(Sym->getType())); |
1342 | LHS = SVB.makeLoc(sym: Sym); |
1343 | } |
1344 | RHS = SVB.makeIntLocVal(integer: S->getRHS()); |
1345 | } else { |
1346 | RHS = SVB.makeIntVal(integer: S->getRHS()); |
1347 | } |
1348 | |
1349 | return cache( |
1350 | Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType())); |
1351 | } |
1352 | |
1353 | SVal VisitIntSymExpr(const IntSymExpr *S) { |
1354 | auto I = Cached.find(S); |
1355 | if (I != Cached.end()) |
1356 | return I->second; |
1357 | |
1358 | SVal RHS = getConstOrVisit(Sym: S->getRHS()); |
1359 | if (isUnchanged(Sym: S->getRHS(), Val: RHS)) |
1360 | return skip(S); |
1361 | |
1362 | SVal LHS = SVB.makeIntVal(integer: S->getLHS()); |
1363 | return cache( |
1364 | Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType())); |
1365 | } |
1366 | |
1367 | SVal VisitSymSymExpr(const SymSymExpr *S) { |
1368 | auto I = Cached.find(S); |
1369 | if (I != Cached.end()) |
1370 | return I->second; |
1371 | |
1372 | // For now don't try to simplify mixed Loc/NonLoc expressions |
1373 | // because they often appear from LocAsInteger operations |
1374 | // and we don't know how to combine a LocAsInteger |
1375 | // with a concrete value. |
1376 | if (Loc::isLocType(T: S->getLHS()->getType()) != |
1377 | Loc::isLocType(T: S->getRHS()->getType())) |
1378 | return skip(S); |
1379 | |
1380 | SVal LHS = getConstOrVisit(Sym: S->getLHS()); |
1381 | SVal RHS = getConstOrVisit(Sym: S->getRHS()); |
1382 | |
1383 | if (isUnchanged(Sym: S->getLHS(), Val: LHS) && isUnchanged(Sym: S->getRHS(), Val: RHS)) |
1384 | return skip(S); |
1385 | |
1386 | return cache( |
1387 | Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType())); |
1388 | } |
1389 | |
1390 | SVal VisitSymbolCast(const SymbolCast *S) { |
1391 | auto I = Cached.find(S); |
1392 | if (I != Cached.end()) |
1393 | return I->second; |
1394 | const SymExpr *OpSym = S->getOperand(); |
1395 | SVal OpVal = getConstOrVisit(Sym: OpSym); |
1396 | if (isUnchanged(Sym: OpSym, Val: OpVal)) |
1397 | return skip(S); |
1398 | |
1399 | return cache(S, SVB.evalCast(V: OpVal, CastTy: S->getType(), OriginalTy: OpSym->getType())); |
1400 | } |
1401 | |
1402 | SVal VisitUnarySymExpr(const UnarySymExpr *S) { |
1403 | auto I = Cached.find(S); |
1404 | if (I != Cached.end()) |
1405 | return I->second; |
1406 | SVal Op = getConstOrVisit(Sym: S->getOperand()); |
1407 | if (isUnchanged(Sym: S->getOperand(), Val: Op)) |
1408 | return skip(S); |
1409 | |
1410 | return cache( |
1411 | S, SVB.evalUnaryOp(state: State, opc: S->getOpcode(), operand: Op, type: S->getType())); |
1412 | } |
1413 | |
1414 | SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); } |
1415 | |
1416 | SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); } |
1417 | |
1418 | SVal VisitSymbolVal(nonloc::SymbolVal V) { |
1419 | // Simplification is much more costly than computing complexity. |
1420 | // For high complexity, it may be not worth it. |
1421 | return Visit(S: V.getSymbol()); |
1422 | } |
1423 | |
1424 | SVal VisitSVal(SVal V) { return V; } |
1425 | }; |
1426 | |
1427 | SVal SimplifiedV = Simplifier(State).Visit(V); |
1428 | return SimplifiedV; |
1429 | } |
1430 | |