1 | // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*- |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | // |

9 | // This file defines SimpleSValBuilder, a basic implementation of SValBuilder. |

10 | // |

11 | //===----------------------------------------------------------------------===// |

12 | |

13 | #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" |

14 | #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" |

15 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" |

16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |

17 | #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" |

18 | |

19 | using namespace clang; |

20 | using namespace ento; |

21 | |

22 | namespace { |

23 | class SimpleSValBuilder : public SValBuilder { |

24 | |

25 | // Query the constraint manager whether the SVal has only one possible |

26 | // (integer) value. If that is the case, the value is returned. Otherwise, |

27 | // returns NULL. |

28 | // This is an implementation detail. Checkers should use `getKnownValue()` |

29 | // instead. |

30 | const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V); |

31 | |

32 | // With one `simplifySValOnce` call, a compound symbols might collapse to |

33 | // simpler symbol tree that is still possible to further simplify. Thus, we |

34 | // do the simplification on a new symbol tree until we reach the simplest |

35 | // form, i.e. the fixpoint. |

36 | // Consider the following symbol `(b * b) * b * b` which has this tree: |

37 | // * |

38 | // / \ |

39 | // * b |

40 | // / \ |

41 | // / b |

42 | // (b * b) |

43 | // Now, if the `b * b == 1` new constraint is added then during the first |

44 | // iteration we have the following transformations: |

45 | // * * |

46 | // / \ / \ |

47 | // * b --> b b |

48 | // / \ |

49 | // / b |

50 | // 1 |

51 | // We need another iteration to reach the final result `1`. |

52 | SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val); |

53 | |

54 | // Recursively descends into symbolic expressions and replaces symbols |

55 | // with their known values (in the sense of the getConstValue() method). |

56 | // We traverse the symbol tree and query the constraint values for the |

57 | // sub-trees and if a value is a constant we do the constant folding. |

58 | SVal simplifySValOnce(ProgramStateRef State, SVal V); |

59 | |

60 | public: |

61 | SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, |

62 | ProgramStateManager &stateMgr) |

63 | : SValBuilder(alloc, context, stateMgr) {} |

64 | ~SimpleSValBuilder() override {} |

65 | |

66 | SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, |

67 | NonLoc lhs, NonLoc rhs, QualType resultTy) override; |

68 | SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, |

69 | Loc lhs, Loc rhs, QualType resultTy) override; |

70 | SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, |

71 | Loc lhs, NonLoc rhs, QualType resultTy) override; |

72 | |

73 | /// Evaluates a given SVal by recursively evaluating and |

74 | /// simplifying the children SVals. If the SVal has only one possible |

75 | /// (integer) value, that value is returned. Otherwise, returns NULL. |

76 | const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override; |

77 | |

78 | SVal simplifySVal(ProgramStateRef State, SVal V) override; |

79 | |

80 | SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op, |

81 | const llvm::APSInt &RHS, QualType resultTy); |

82 | }; |

83 | } // end anonymous namespace |

84 | |

85 | SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, |

86 | ASTContext &context, |

87 | ProgramStateManager &stateMgr) { |

88 | return new SimpleSValBuilder(alloc, context, stateMgr); |

89 | } |

90 | |

91 | // Checks if the negation the value and flipping sign preserve |

92 | // the semantics on the operation in the resultType |

93 | static bool isNegationValuePreserving(const llvm::APSInt &Value, |

94 | APSIntType ResultType) { |

95 | const unsigned ValueBits = Value.getSignificantBits(); |

96 | if (ValueBits == ResultType.getBitWidth()) { |

97 | // The value is the lowest negative value that is representable |

98 | // in signed integer with bitWith of result type. The |

99 | // negation is representable if resultType is unsigned. |

100 | return ResultType.isUnsigned(); |

101 | } |

102 | |

103 | // If resultType bitWith is higher that number of bits required |

104 | // to represent RHS, the sign flip produce same value. |

105 | return ValueBits < ResultType.getBitWidth(); |

106 | } |

107 | |

108 | //===----------------------------------------------------------------------===// |

109 | // Transfer function for binary operators. |

110 | //===----------------------------------------------------------------------===// |

111 | |

112 | SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, |

113 | BinaryOperator::Opcode op, |

114 | const llvm::APSInt &RHS, |

115 | QualType resultTy) { |

116 | bool isIdempotent = false; |

117 | |

118 | // Check for a few special cases with known reductions first. |

119 | switch (op) { |

120 | default: |

121 | // We can't reduce this case; just treat it normally. |

122 | break; |

123 | case BO_Mul: |

124 | // a*0 and a*1 |

125 | if (RHS == 0) |

126 | return makeIntVal(0, resultTy); |

127 | else if (RHS == 1) |

128 | isIdempotent = true; |

129 | break; |

130 | case BO_Div: |

131 | // a/0 and a/1 |

132 | if (RHS == 0) |

133 | // This is also handled elsewhere. |

134 | return UndefinedVal(); |

135 | else if (RHS == 1) |

136 | isIdempotent = true; |

137 | break; |

138 | case BO_Rem: |

139 | // a%0 and a%1 |

140 | if (RHS == 0) |

141 | // This is also handled elsewhere. |

142 | return UndefinedVal(); |

143 | else if (RHS == 1) |

144 | return makeIntVal(0, resultTy); |

145 | break; |

146 | case BO_Add: |

147 | case BO_Sub: |

148 | case BO_Shl: |

149 | case BO_Shr: |

150 | case BO_Xor: |

151 | // a+0, a-0, a<<0, a>>0, a^0 |

152 | if (RHS == 0) |

153 | isIdempotent = true; |

154 | break; |

155 | case BO_And: |

156 | // a&0 and a&(~0) |

157 | if (RHS == 0) |

158 | return makeIntVal(0, resultTy); |

159 | else if (RHS.isAllOnes()) |

160 | isIdempotent = true; |

161 | break; |

162 | case BO_Or: |

163 | // a|0 and a|(~0) |

164 | if (RHS == 0) |

165 | isIdempotent = true; |

166 | else if (RHS.isAllOnes()) { |

167 | const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS); |

168 | return nonloc::ConcreteInt(Result); |

169 | } |

170 | break; |

171 | } |

172 | |

173 | // Idempotent ops (like a*1) can still change the type of an expression. |

174 | // Wrap the LHS up in a NonLoc again and let evalCast do the |

175 | // dirty work. |

176 | if (isIdempotent) |

177 | return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{}); |

178 | |

179 | // If we reach this point, the expression cannot be simplified. |

180 | // Make a SymbolVal for the entire expression, after converting the RHS. |

181 | const llvm::APSInt *ConvertedRHS = &RHS; |

182 | if (BinaryOperator::isComparisonOp(op)) { |

183 | // We're looking for a type big enough to compare the symbolic value |

184 | // with the given constant. |

185 | // FIXME: This is an approximation of Sema::UsualArithmeticConversions. |

186 | ASTContext &Ctx = getContext(); |

187 | QualType SymbolType = LHS->getType(); |

188 | uint64_t ValWidth = RHS.getBitWidth(); |

189 | uint64_t TypeWidth = Ctx.getTypeSize(SymbolType); |

190 | |

191 | if (ValWidth < TypeWidth) { |

192 | // If the value is too small, extend it. |

193 | ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); |

194 | } else if (ValWidth == TypeWidth) { |

195 | // If the value is signed but the symbol is unsigned, do the comparison |

196 | // in unsigned space. [C99 6.3.1.8] |

197 | // (For the opposite case, the value is already unsigned.) |

198 | if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType()) |

199 | ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); |

200 | } |

201 | } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) { |

202 | // Change a+(-N) into a-N, and a-(-N) into a+N |

203 | // Adjust addition/subtraction of negative value, to |

204 | // subtraction/addition of the negated value. |

205 | APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy); |

206 | if (isNegationValuePreserving(RHS, resultIntTy)) { |

207 | ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS)); |

208 | op = (op == BO_Add) ? BO_Sub : BO_Add; |

209 | } else { |

210 | ConvertedRHS = &BasicVals.Convert(resultTy, RHS); |

211 | } |

212 | } else |

213 | ConvertedRHS = &BasicVals.Convert(resultTy, RHS); |

214 | |

215 | return makeNonLoc(LHS, op, *ConvertedRHS, resultTy); |

216 | } |

217 | |

218 | // See if Sym is known to be a relation Rel with Bound. |

219 | static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, |

220 | llvm::APSInt Bound, ProgramStateRef State) { |

221 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |

222 | SVal Result = |

223 | SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym), |

224 | nonloc::ConcreteInt(Bound), SVB.getConditionType()); |

225 | if (auto DV = Result.getAs<DefinedSVal>()) { |

226 | return !State->assume(*DV, false); |

227 | } |

228 | return false; |

229 | } |

230 | |

231 | // See if Sym is known to be within [min/4, max/4], where min and max |

232 | // are the bounds of the symbol's integral type. With such symbols, |

233 | // some manipulations can be performed without the risk of overflow. |

234 | // assume() doesn't cause infinite recursion because we should be dealing |

235 | // with simpler symbols on every recursive call. |

236 | static bool isWithinConstantOverflowBounds(SymbolRef Sym, |

237 | ProgramStateRef State) { |

238 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |

239 | BasicValueFactory &BV = SVB.getBasicValueFactory(); |

240 | |

241 | QualType T = Sym->getType(); |

242 | assert(T->isSignedIntegerOrEnumerationType() && |

243 | "This only works with signed integers!"); |

244 | APSIntType AT = BV.getAPSIntType(T); |

245 | |

246 | llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; |

247 | return isInRelation(BO_LE, Sym, Max, State) && |

248 | isInRelation(BO_GE, Sym, Min, State); |

249 | } |

250 | |

251 | // Same for the concrete integers: see if I is within [min/4, max/4]. |

252 | static bool isWithinConstantOverflowBounds(llvm::APSInt I) { |

253 | APSIntType AT(I); |

254 | assert(!AT.isUnsigned() && |

255 | "This only works with signed integers!"); |

256 | |

257 | llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; |

258 | return (I <= Max) && (I >= -Max); |

259 | } |

260 | |

261 | static std::pair<SymbolRef, llvm::APSInt> |

262 | decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) { |

263 | if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym)) |

264 | if (BinaryOperator::isAdditiveOp(SymInt->getOpcode())) |

265 | return std::make_pair(SymInt->getLHS(), |

266 | (SymInt->getOpcode() == BO_Add) ? |

267 | (SymInt->getRHS()) : |

268 | (-SymInt->getRHS())); |

269 | |

270 | // Fail to decompose: "reduce" the problem to the "$x + 0" case. |

271 | return std::make_pair(Sym, BV.getValue(0, Sym->getType())); |

272 | } |

273 | |

274 | // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the |

275 | // same signed integral type and no overflows occur (which should be checked |

276 | // by the caller). |

277 | static NonLoc doRearrangeUnchecked(ProgramStateRef State, |

278 | BinaryOperator::Opcode Op, |

279 | SymbolRef LSym, llvm::APSInt LInt, |

280 | SymbolRef RSym, llvm::APSInt RInt) { |

281 | SValBuilder &SVB = State->getStateManager().getSValBuilder(); |

282 | BasicValueFactory &BV = SVB.getBasicValueFactory(); |

283 | SymbolManager &SymMgr = SVB.getSymbolManager(); |

284 | |

285 | QualType SymTy = LSym->getType(); |

286 | assert(SymTy == RSym->getType() && |

287 | "Symbols are not of the same type!"); |

288 | assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) && |

289 | "Integers are not of the same type as symbols!"); |

290 | assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) && |

291 | "Integers are not of the same type as symbols!"); |

292 | |

293 | QualType ResultTy; |

294 | if (BinaryOperator::isComparisonOp(Op)) |

295 | ResultTy = SVB.getConditionType(); |

296 | else if (BinaryOperator::isAdditiveOp(Op)) |

297 | ResultTy = SymTy; |

298 | else |

299 | llvm_unreachable("Operation not suitable for unchecked rearrangement!"); |

300 | |

301 | if (LSym == RSym) |

302 | return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt), |

303 | nonloc::ConcreteInt(RInt), ResultTy) |

304 | .castAs<NonLoc>(); |

305 | |

306 | SymbolRef ResultSym = nullptr; |

307 | BinaryOperator::Opcode ResultOp; |

308 | llvm::APSInt ResultInt; |

309 | if (BinaryOperator::isComparisonOp(Op)) { |

310 | // Prefer comparing to a non-negative number. |

311 | // FIXME: Maybe it'd be better to have consistency in |

312 | // "$x - $y" vs. "$y - $x" because those are solver's keys. |

313 | if (LInt > RInt) { |

314 | ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy); |

315 | ResultOp = BinaryOperator::reverseComparisonOp(Op); |

316 | ResultInt = LInt - RInt; // Opposite order! |

317 | } else { |

318 | ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy); |

319 | ResultOp = Op; |

320 | ResultInt = RInt - LInt; // Opposite order! |

321 | } |

322 | } else { |

323 | ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy); |

324 | ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt); |

325 | ResultOp = BO_Add; |

326 | // Bring back the cosmetic difference. |

327 | if (ResultInt < 0) { |

328 | ResultInt = -ResultInt; |

329 | ResultOp = BO_Sub; |

330 | } else if (ResultInt == 0) { |

331 | // Shortcut: Simplify "$x + 0" to "$x". |

332 | return nonloc::SymbolVal(ResultSym); |

333 | } |

334 | } |

335 | const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt); |

336 | return nonloc::SymbolVal( |

337 | SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy)); |

338 | } |

339 | |

340 | // Rearrange if symbol type matches the result type and if the operator is a |

341 | // comparison operator, both symbol and constant must be within constant |

342 | // overflow bounds. |

343 | static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, |

344 | SymbolRef Sym, llvm::APSInt Int, QualType Ty) { |

345 | return Sym->getType() == Ty && |

346 | (!BinaryOperator::isComparisonOp(Op) || |

347 | (isWithinConstantOverflowBounds(Sym, State) && |

348 | isWithinConstantOverflowBounds(Int))); |

349 | } |

350 | |

351 | static Optional<NonLoc> tryRearrange(ProgramStateRef State, |

352 | BinaryOperator::Opcode Op, NonLoc Lhs, |

353 | NonLoc Rhs, QualType ResultTy) { |

354 | ProgramStateManager &StateMgr = State->getStateManager(); |

355 | SValBuilder &SVB = StateMgr.getSValBuilder(); |

356 | |

357 | // We expect everything to be of the same type - this type. |

358 | QualType SingleTy; |

359 | |

360 | // FIXME: After putting complexity threshold to the symbols we can always |

361 | // rearrange additive operations but rearrange comparisons only if |

362 | // option is set. |

363 | if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) |

364 | return None; |

365 | |

366 | SymbolRef LSym = Lhs.getAsSymbol(); |

367 | if (!LSym) |

368 | return None; |

369 | |

370 | if (BinaryOperator::isComparisonOp(Op)) { |

371 | SingleTy = LSym->getType(); |

372 | if (ResultTy != SVB.getConditionType()) |

373 | return None; |

374 | // Initialize SingleTy later with a symbol's type. |

375 | } else if (BinaryOperator::isAdditiveOp(Op)) { |

376 | SingleTy = ResultTy; |

377 | if (LSym->getType() != SingleTy) |

378 | return None; |

379 | } else { |

380 | // Don't rearrange other operations. |

381 | return None; |

382 | } |

383 | |

384 | assert(!SingleTy.isNull() && "We should have figured out the type by now!"); |

385 | |

386 | // Rearrange signed symbolic expressions only |

387 | if (!SingleTy->isSignedIntegerOrEnumerationType()) |

388 | return None; |

389 | |

390 | SymbolRef RSym = Rhs.getAsSymbol(); |

391 | if (!RSym || RSym->getType() != SingleTy) |

392 | return None; |

393 | |

394 | BasicValueFactory &BV = State->getBasicVals(); |

395 | llvm::APSInt LInt, RInt; |

396 | std::tie(LSym, LInt) = decomposeSymbol(LSym, BV); |

397 | std::tie(RSym, RInt) = decomposeSymbol(RSym, BV); |

398 | if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) || |

399 | !shouldRearrange(State, Op, RSym, RInt, SingleTy)) |

400 | return None; |

401 | |

402 | // We know that no overflows can occur anymore. |

403 | return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt); |

404 | } |

405 | |

406 | SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, |

407 | BinaryOperator::Opcode op, |

408 | NonLoc lhs, NonLoc rhs, |

409 | QualType resultTy) { |

410 | NonLoc InputLHS = lhs; |

411 | NonLoc InputRHS = rhs; |

412 | |

413 | // Constraints may have changed since the creation of a bound SVal. Check if |

414 | // the values can be simplified based on those new constraints. |

415 | SVal simplifiedLhs = simplifySVal(state, lhs); |

416 | SVal simplifiedRhs = simplifySVal(state, rhs); |

417 | if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) |

418 | lhs = *simplifiedLhsAsNonLoc; |

419 | if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) |

420 | rhs = *simplifiedRhsAsNonLoc; |

421 | |

422 | // Handle trivial case where left-side and right-side are the same. |

423 | if (lhs == rhs) |

424 | switch (op) { |

425 | default: |

426 | break; |

427 | case BO_EQ: |

428 | case BO_LE: |

429 | case BO_GE: |

430 | return makeTruthVal(true, resultTy); |

431 | case BO_LT: |

432 | case BO_GT: |

433 | case BO_NE: |

434 | return makeTruthVal(false, resultTy); |

435 | case BO_Xor: |

436 | case BO_Sub: |

437 | if (resultTy->isIntegralOrEnumerationType()) |

438 | return makeIntVal(0, resultTy); |

439 | return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy, |

440 | QualType{}); |

441 | case BO_Or: |

442 | case BO_And: |

443 | return evalCast(lhs, resultTy, QualType{}); |

444 | } |

445 | |

446 | while (true) { |

447 | switch (lhs.getSubKind()) { |

448 | default: |

449 | return makeSymExprValNN(op, lhs, rhs, resultTy); |

450 | case nonloc::PointerToMemberKind: { |

451 | assert(rhs.getSubKind() == nonloc::PointerToMemberKind && |

452 | "Both SVals should have pointer-to-member-type"); |

453 | auto LPTM = lhs.castAs<nonloc::PointerToMember>(), |

454 | RPTM = rhs.castAs<nonloc::PointerToMember>(); |

455 | auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData(); |

456 | switch (op) { |

457 | case BO_EQ: |

458 | return makeTruthVal(LPTMD == RPTMD, resultTy); |

459 | case BO_NE: |

460 | return makeTruthVal(LPTMD != RPTMD, resultTy); |

461 | default: |

462 | return UnknownVal(); |

463 | } |

464 | } |

465 | case nonloc::LocAsIntegerKind: { |

466 | Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc(); |

467 | switch (rhs.getSubKind()) { |

468 | case nonloc::LocAsIntegerKind: |

469 | // FIXME: at the moment the implementation |

470 | // of modeling "pointers as integers" is not complete. |

471 | if (!BinaryOperator::isComparisonOp(op)) |

472 | return UnknownVal(); |

473 | return evalBinOpLL(state, op, lhsL, |

474 | rhs.castAs<nonloc::LocAsInteger>().getLoc(), |

475 | resultTy); |

476 | case nonloc::ConcreteIntKind: { |

477 | // FIXME: at the moment the implementation |

478 | // of modeling "pointers as integers" is not complete. |

479 | if (!BinaryOperator::isComparisonOp(op)) |

480 | return UnknownVal(); |

481 | // Transform the integer into a location and compare. |

482 | // FIXME: This only makes sense for comparisons. If we want to, say, |

483 | // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it, |

484 | // then pack it back into a LocAsInteger. |

485 | llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue(); |

486 | // If the region has a symbolic base, pay attention to the type; it |

487 | // might be coming from a non-default address space. For non-symbolic |

488 | // regions it doesn't matter that much because such comparisons would |

489 | // most likely evaluate to concrete false anyway. FIXME: We might |

490 | // still need to handle the non-comparison case. |

491 | if (SymbolRef lSym = lhs.getAsLocSymbol(true)) |

492 | BasicVals.getAPSIntType(lSym->getType()).apply(i); |

493 | else |

494 | BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i); |

495 | return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy); |

496 | } |

497 | default: |

498 | switch (op) { |

499 | case BO_EQ: |

500 | return makeTruthVal(false, resultTy); |

501 | case BO_NE: |

502 | return makeTruthVal(true, resultTy); |

503 | default: |

504 | // This case also handles pointer arithmetic. |

505 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |

506 | } |

507 | } |

508 | } |

509 | case nonloc::ConcreteIntKind: { |

510 | llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue(); |

511 | |

512 | // If we're dealing with two known constants, just perform the operation. |

513 | if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) { |

514 | llvm::APSInt RHSValue = *KnownRHSValue; |

515 | if (BinaryOperator::isComparisonOp(op)) { |

516 | // We're looking for a type big enough to compare the two values. |

517 | // FIXME: This is not correct. char + short will result in a promotion |

518 | // to int. Unfortunately we have lost types by this point. |

519 | APSIntType CompareType = std::max(APSIntType(LHSValue), |

520 | APSIntType(RHSValue)); |

521 | CompareType.apply(LHSValue); |

522 | CompareType.apply(RHSValue); |

523 | } else if (!BinaryOperator::isShiftOp(op)) { |

524 | APSIntType IntType = BasicVals.getAPSIntType(resultTy); |

525 | IntType.apply(LHSValue); |

526 | IntType.apply(RHSValue); |

527 | } |

528 | |

529 | const llvm::APSInt *Result = |

530 | BasicVals.evalAPSInt(op, LHSValue, RHSValue); |

531 | if (!Result) |

532 | return UndefinedVal(); |

533 | |

534 | return nonloc::ConcreteInt(*Result); |

535 | } |

536 | |

537 | // Swap the left and right sides and flip the operator if doing so |

538 | // allows us to better reason about the expression (this is a form |

539 | // of expression canonicalization). |

540 | // While we're at it, catch some special cases for non-commutative ops. |

541 | switch (op) { |

542 | case BO_LT: |

543 | case BO_GT: |

544 | case BO_LE: |

545 | case BO_GE: |

546 | op = BinaryOperator::reverseComparisonOp(op); |

547 | [[fallthrough]]; |

548 | case BO_EQ: |

549 | case BO_NE: |

550 | case BO_Add: |

551 | case BO_Mul: |

552 | case BO_And: |

553 | case BO_Xor: |

554 | case BO_Or: |

555 | std::swap(lhs, rhs); |

556 | continue; |

557 | case BO_Shr: |

558 | // (~0)>>a |

559 | if (LHSValue.isAllOnes() && LHSValue.isSigned()) |

560 | return evalCast(lhs, resultTy, QualType{}); |

561 | [[fallthrough]]; |

562 | case BO_Shl: |

563 | // 0<<a and 0>>a |

564 | if (LHSValue == 0) |

565 | return evalCast(lhs, resultTy, QualType{}); |

566 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |

567 | case BO_Div: |

568 | // 0 / x == 0 |

569 | case BO_Rem: |

570 | // 0 % x == 0 |

571 | if (LHSValue == 0) |

572 | return makeZeroVal(resultTy); |

573 | [[fallthrough]]; |

574 | default: |

575 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |

576 | } |

577 | } |

578 | case nonloc::SymbolValKind: { |

579 | // We only handle LHS as simple symbols or SymIntExprs. |

580 | SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); |

581 | |

582 | // LHS is a symbolic expression. |

583 | if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) { |

584 | |

585 | // Is this a logical not? (!x is represented as x == 0.) |

586 | if (op == BO_EQ && rhs.isZeroConstant()) { |

587 | // We know how to negate certain expressions. Simplify them here. |

588 | |

589 | BinaryOperator::Opcode opc = symIntExpr->getOpcode(); |

590 | switch (opc) { |

591 | default: |

592 | // We don't know how to negate this operation. |

593 | // Just handle it as if it were a normal comparison to 0. |

594 | break; |

595 | case BO_LAnd: |

596 | case BO_LOr: |

597 | llvm_unreachable("Logical operators handled by branching logic."); |

598 | case BO_Assign: |

599 | case BO_MulAssign: |

600 | case BO_DivAssign: |

601 | case BO_RemAssign: |

602 | case BO_AddAssign: |

603 | case BO_SubAssign: |

604 | case BO_ShlAssign: |

605 | case BO_ShrAssign: |

606 | case BO_AndAssign: |

607 | case BO_XorAssign: |

608 | case BO_OrAssign: |

609 | case BO_Comma: |

610 | llvm_unreachable("'=' and ',' operators handled by ExprEngine."); |

611 | case BO_PtrMemD: |

612 | case BO_PtrMemI: |

613 | llvm_unreachable("Pointer arithmetic not handled here."); |

614 | case BO_LT: |

615 | case BO_GT: |

616 | case BO_LE: |

617 | case BO_GE: |

618 | case BO_EQ: |

619 | case BO_NE: |

620 | assert(resultTy->isBooleanType() || |

621 | resultTy == getConditionType()); |

622 | assert(symIntExpr->getType()->isBooleanType() || |

623 | getContext().hasSameUnqualifiedType(symIntExpr->getType(), |

624 | getConditionType())); |

625 | // Negate the comparison and make a value. |

626 | opc = BinaryOperator::negateComparisonOp(opc); |

627 | return makeNonLoc(symIntExpr->getLHS(), opc, |

628 | symIntExpr->getRHS(), resultTy); |

629 | } |

630 | } |

631 | |

632 | // For now, only handle expressions whose RHS is a constant. |

633 | if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) { |

634 | // If both the LHS and the current expression are additive, |

635 | // fold their constants and try again. |

636 | if (BinaryOperator::isAdditiveOp(op)) { |

637 | BinaryOperator::Opcode lop = symIntExpr->getOpcode(); |

638 | if (BinaryOperator::isAdditiveOp(lop)) { |

639 | // Convert the two constants to a common type, then combine them. |

640 | |

641 | // resultTy may not be the best type to convert to, but it's |

642 | // probably the best choice in expressions with mixed type |

643 | // (such as x+1U+2LL). The rules for implicit conversions should |

644 | // choose a reasonable type to preserve the expression, and will |

645 | // at least match how the value is going to be used. |

646 | APSIntType IntType = BasicVals.getAPSIntType(resultTy); |

647 | const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS()); |

648 | const llvm::APSInt &second = IntType.convert(*RHSValue); |

649 | |

650 | // If the op and lop agrees, then we just need to |

651 | // sum the constants. Otherwise, we change to operation |

652 | // type if substraction would produce negative value |

653 | // (and cause overflow for unsigned integers), |

654 | // as consequence x+1U-10 produces x-9U, instead |

655 | // of x+4294967287U, that would be produced without this |

656 | // additional check. |

657 | const llvm::APSInt *newRHS; |

658 | if (lop == op) { |

659 | newRHS = BasicVals.evalAPSInt(BO_Add, first, second); |

660 | } else if (first >= second) { |

661 | newRHS = BasicVals.evalAPSInt(BO_Sub, first, second); |

662 | op = lop; |

663 | } else { |

664 | newRHS = BasicVals.evalAPSInt(BO_Sub, second, first); |

665 | } |

666 | |

667 | assert(newRHS && "Invalid operation despite common type!"); |

668 | rhs = nonloc::ConcreteInt(*newRHS); |

669 | lhs = nonloc::SymbolVal(symIntExpr->getLHS()); |

670 | continue; |

671 | } |

672 | } |

673 | |

674 | // Otherwise, make a SymIntExpr out of the expression. |

675 | return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy); |

676 | } |

677 | } |

678 | |

679 | // Is the RHS a constant? |

680 | if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) |

681 | return MakeSymIntVal(Sym, op, *RHSValue, resultTy); |

682 | |

683 | if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy)) |

684 | return *V; |

685 | |

686 | // Give up -- this is not a symbolic expression we can handle. |

687 | return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); |

688 | } |

689 | } |

690 | } |

691 | } |

692 | |

693 | static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, |

694 | const FieldRegion *RightFR, |

695 | BinaryOperator::Opcode op, |

696 | QualType resultTy, |

697 | SimpleSValBuilder &SVB) { |

698 | // Only comparisons are meaningful here! |

699 | if (!BinaryOperator::isComparisonOp(op)) |

700 | return UnknownVal(); |

701 | |

702 | // Next, see if the two FRs have the same super-region. |

703 | // FIXME: This doesn't handle casts yet, and simply stripping the casts |

704 | // doesn't help. |

705 | if (LeftFR->getSuperRegion() != RightFR->getSuperRegion()) |

706 | return UnknownVal(); |

707 | |

708 | const FieldDecl *LeftFD = LeftFR->getDecl(); |

709 | const FieldDecl *RightFD = RightFR->getDecl(); |

710 | const RecordDecl *RD = LeftFD->getParent(); |

711 | |

712 | // Make sure the two FRs are from the same kind of record. Just in case! |

713 | // FIXME: This is probably where inheritance would be a problem. |

714 | if (RD != RightFD->getParent()) |

715 | return UnknownVal(); |

716 | |

717 | // We know for sure that the two fields are not the same, since that |

718 | // would have given us the same SVal. |

719 | if (op == BO_EQ) |

720 | return SVB.makeTruthVal(false, resultTy); |

721 | if (op == BO_NE) |

722 | return SVB.makeTruthVal(true, resultTy); |

723 | |

724 | // Iterate through the fields and see which one comes first. |

725 | // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field |

726 | // members and the units in which bit-fields reside have addresses that |

727 | // increase in the order in which they are declared." |

728 | bool leftFirst = (op == BO_LT || op == BO_LE); |

729 | for (const auto *I : RD->fields()) { |

730 | if (I == LeftFD) |

731 | return SVB.makeTruthVal(leftFirst, resultTy); |

732 | if (I == RightFD) |

733 | return SVB.makeTruthVal(!leftFirst, resultTy); |

734 | } |

735 | |

736 | llvm_unreachable("Fields not found in parent record's definition"); |

737 | } |

738 | |

739 | // This is used in debug builds only for now because some downstream users |

740 | // may hit this assert in their subsequent merges. |

741 | // There are still places in the analyzer where equal bitwidth Locs |

742 | // are compared, and need to be found and corrected. Recent previous fixes have |

743 | // addressed the known problems of making NULLs with specific bitwidths |

744 | // for Loc comparisons along with deprecation of APIs for the same purpose. |

745 | // |

746 | static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, |

747 | Loc LhsLoc) { |

748 | // Implements a "best effort" check for RhsLoc and LhsLoc bit widths |

749 | ASTContext &Ctx = State->getStateManager().getContext(); |

750 | uint64_t RhsBitwidth = |

751 | RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx)); |

752 | uint64_t LhsBitwidth = |

753 | LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx)); |

754 | if (RhsBitwidth && LhsBitwidth && |

755 | (LhsLoc.getSubKind() == RhsLoc.getSubKind())) { |

756 | assert(RhsBitwidth == LhsBitwidth && |

757 | "RhsLoc and LhsLoc bitwidth must be same!"); |

758 | } |

759 | } |

760 | |

761 | // FIXME: all this logic will change if/when we have MemRegion::getLocation(). |

762 | SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, |

763 | BinaryOperator::Opcode op, |

764 | Loc lhs, Loc rhs, |

765 | QualType resultTy) { |

766 | |

767 | // Assert that bitwidth of lhs and rhs are the same. |

768 | // This can happen if two different address spaces are used, |

769 | // and the bitwidths of the address spaces are different. |

770 | // See LIT case clang/test/Analysis/cstring-checker-addressspace.c |

771 | // FIXME: See comment above in the function assertEqualBitWidths |

772 | assertEqualBitWidths(state, rhs, lhs); |

773 | |

774 | // Only comparisons and subtractions are valid operations on two pointers. |

775 | // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15]. |

776 | // However, if a pointer is casted to an integer, evalBinOpNN may end up |

777 | // calling this function with another operation (PR7527). We don't attempt to |

778 | // model this for now, but it could be useful, particularly when the |

779 | // "location" is actually an integer value that's been passed through a void*. |

780 | if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub)) |

781 | return UnknownVal(); |

782 | |

783 | // Special cases for when both sides are identical. |

784 | if (lhs == rhs) { |

785 | switch (op) { |

786 | default: |

787 | llvm_unreachable("Unimplemented operation for two identical values"); |

788 | case BO_Sub: |

789 | return makeZeroVal(resultTy); |

790 | case BO_EQ: |

791 | case BO_LE: |

792 | case BO_GE: |

793 | return makeTruthVal(true, resultTy); |

794 | case BO_NE: |

795 | case BO_LT: |

796 | case BO_GT: |

797 | return makeTruthVal(false, resultTy); |

798 | } |

799 | } |

800 | |

801 | switch (lhs.getSubKind()) { |

802 | default: |

803 | llvm_unreachable("Ordering not implemented for this Loc."); |

804 | |

805 | case loc::GotoLabelKind: |

806 | // The only thing we know about labels is that they're non-null. |

807 | if (rhs.isZeroConstant()) { |

808 | switch (op) { |

809 | default: |

810 | break; |

811 | case BO_Sub: |

812 | return evalCast(lhs, resultTy, QualType{}); |

813 | case BO_EQ: |

814 | case BO_LE: |

815 | case BO_LT: |

816 | return makeTruthVal(false, resultTy); |

817 | case BO_NE: |

818 | case BO_GT: |

819 | case BO_GE: |

820 | return makeTruthVal(true, resultTy); |

821 | } |

822 | } |

823 | // There may be two labels for the same location, and a function region may |

824 | // have the same address as a label at the start of the function (depending |

825 | // on the ABI). |

826 | // FIXME: we can probably do a comparison against other MemRegions, though. |

827 | // FIXME: is there a way to tell if two labels refer to the same location? |

828 | return UnknownVal(); |

829 | |

830 | case loc::ConcreteIntKind: { |

831 | auto L = lhs.castAs<loc::ConcreteInt>(); |

832 | |

833 | // If one of the operands is a symbol and the other is a constant, |

834 | // build an expression for use by the constraint manager. |

835 | if (SymbolRef rSym = rhs.getAsLocSymbol()) { |

836 | // We can only build expressions with symbols on the left, |

837 | // so we need a reversible operator. |

838 | if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) |

839 | return UnknownVal(); |

840 | |

841 | op = BinaryOperator::reverseComparisonOp(op); |

842 | return makeNonLoc(rSym, op, L.getValue(), resultTy); |

843 | } |

844 | |

845 | // If both operands are constants, just perform the operation. |

846 | if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { |

847 | assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub); |

848 | |

849 | if (const auto *ResultInt = |

850 | BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue())) |

851 | return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{}); |

852 | return UnknownVal(); |

853 | } |

854 | |

855 | // Special case comparisons against NULL. |

856 | // This must come after the test if the RHS is a symbol, which is used to |

857 | // build constraints. The address of any non-symbolic region is guaranteed |

858 | // to be non-NULL, as is any label. |

859 | assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs))); |

860 | if (lhs.isZeroConstant()) { |

861 | switch (op) { |

862 | default: |

863 | break; |

864 | case BO_EQ: |

865 | case BO_GT: |

866 | case BO_GE: |

867 | return makeTruthVal(false, resultTy); |

868 | case BO_NE: |

869 | case BO_LT: |

870 | case BO_LE: |

871 | return makeTruthVal(true, resultTy); |

872 | } |

873 | } |

874 | |

875 | // Comparing an arbitrary integer to a region or label address is |

876 | // completely unknowable. |

877 | return UnknownVal(); |

878 | } |

879 | case loc::MemRegionValKind: { |

880 | if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { |

881 | // If one of the operands is a symbol and the other is a constant, |

882 | // build an expression for use by the constraint manager. |

883 | if (SymbolRef lSym = lhs.getAsLocSymbol(true)) { |

884 | if (BinaryOperator::isComparisonOp(op)) |

885 | return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy); |

886 | return UnknownVal(); |

887 | } |

888 | // Special case comparisons to NULL. |

889 | // This must come after the test if the LHS is a symbol, which is used to |

890 | // build constraints. The address of any non-symbolic region is guaranteed |

891 | // to be non-NULL. |

892 | if (rInt->isZeroConstant()) { |

893 | if (op == BO_Sub) |

894 | return evalCast(lhs, resultTy, QualType{}); |

895 | |

896 | if (BinaryOperator::isComparisonOp(op)) { |

897 | QualType boolType = getContext().BoolTy; |

898 | NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>(); |

899 | NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>(); |

900 | return evalBinOpNN(state, op, l, r, resultTy); |

901 | } |

902 | } |

903 | |

904 | // Comparing a region to an arbitrary integer is completely unknowable. |

905 | return UnknownVal(); |

906 | } |

907 | |

908 | // Get both values as regions, if possible. |

909 | const MemRegion *LeftMR = lhs.getAsRegion(); |

910 | assert(LeftMR && "MemRegionValKind SVal doesn't have a region!"); |

911 | |

912 | const MemRegion *RightMR = rhs.getAsRegion(); |

913 | if (!RightMR) |

914 | // The RHS is probably a label, which in theory could address a region. |

915 | // FIXME: we can probably make a more useful statement about non-code |

916 | // regions, though. |

917 | return UnknownVal(); |

918 | |

919 | const MemRegion *LeftBase = LeftMR->getBaseRegion(); |

920 | const MemRegion *RightBase = RightMR->getBaseRegion(); |

921 | const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(); |

922 | const MemSpaceRegion *RightMS = RightBase->getMemorySpace(); |

923 | const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion(); |

924 | |

925 | // If the two regions are from different known memory spaces they cannot be |

926 | // equal. Also, assume that no symbolic region (whose memory space is |

927 | // unknown) is on the stack. |

928 | if (LeftMS != RightMS && |

929 | ((LeftMS != UnknownMS && RightMS != UnknownMS) || |

930 | (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) { |

931 | switch (op) { |

932 | default: |

933 | return UnknownVal(); |

934 | case BO_EQ: |

935 | return makeTruthVal(false, resultTy); |

936 | case BO_NE: |

937 | return makeTruthVal(true, resultTy); |

938 | } |

939 | } |

940 | |

941 | // If both values wrap regions, see if they're from different base regions. |

942 | // Note, heap base symbolic regions are assumed to not alias with |

943 | // each other; for example, we assume that malloc returns different address |

944 | // on each invocation. |

945 | // FIXME: ObjC object pointers always reside on the heap, but currently |

946 | // we treat their memory space as unknown, because symbolic pointers |

947 | // to ObjC objects may alias. There should be a way to construct |

948 | // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker |

949 | // guesses memory space for ObjC object pointers manually instead of |

950 | // relying on us. |

951 | if (LeftBase != RightBase && |

952 | ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) || |

953 | (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){ |

954 | switch (op) { |

955 | default: |

956 | return UnknownVal(); |

957 | case BO_EQ: |

958 | return makeTruthVal(false, resultTy); |

959 | case BO_NE: |

960 | return makeTruthVal(true, resultTy); |

961 | } |

962 | } |

963 | |

964 | // Handle special cases for when both regions are element regions. |

965 | const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR); |

966 | const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR); |

967 | if (RightER && LeftER) { |

968 | // Next, see if the two ERs have the same super-region and matching types. |

969 | // FIXME: This should do something useful even if the types don't match, |

970 | // though if both indexes are constant the RegionRawOffset path will |

971 | // give the correct answer. |

972 | if (LeftER->getSuperRegion() == RightER->getSuperRegion() && |

973 | LeftER->getElementType() == RightER->getElementType()) { |

974 | // Get the left index and cast it to the correct type. |

975 | // If the index is unknown or undefined, bail out here. |

976 | SVal LeftIndexVal = LeftER->getIndex(); |

977 | Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>(); |

978 | if (!LeftIndex) |

979 | return UnknownVal(); |

980 | LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{}); |

981 | LeftIndex = LeftIndexVal.getAs<NonLoc>(); |

982 | if (!LeftIndex) |

983 | return UnknownVal(); |

984 | |

985 | // Do the same for the right index. |

986 | SVal RightIndexVal = RightER->getIndex(); |

987 | Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>(); |

988 | if (!RightIndex) |

989 | return UnknownVal(); |

990 | RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{}); |

991 | RightIndex = RightIndexVal.getAs<NonLoc>(); |

992 | if (!RightIndex) |

993 | return UnknownVal(); |

994 | |

995 | // Actually perform the operation. |

996 | // evalBinOpNN expects the two indexes to already be the right type. |

997 | return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy); |

998 | } |

999 | } |

1000 | |

1001 | // Special handling of the FieldRegions, even with symbolic offsets. |

1002 | const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR); |

1003 | const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR); |

1004 | if (RightFR && LeftFR) { |

1005 | SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy, |

1006 | *this); |

1007 | if (!R.isUnknown()) |

1008 | return R; |

1009 | } |

1010 | |

1011 | // Compare the regions using the raw offsets. |

1012 | RegionOffset LeftOffset = LeftMR->getAsOffset(); |

1013 | RegionOffset RightOffset = RightMR->getAsOffset(); |

1014 | |

1015 | if (LeftOffset.getRegion() != nullptr && |

1016 | LeftOffset.getRegion() == RightOffset.getRegion() && |

1017 | !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) { |

1018 | int64_t left = LeftOffset.getOffset(); |

1019 | int64_t right = RightOffset.getOffset(); |

1020 | |

1021 | switch (op) { |

1022 | default: |

1023 | return UnknownVal(); |

1024 | case BO_LT: |

1025 | return makeTruthVal(left < right, resultTy); |

1026 | case BO_GT: |

1027 | return makeTruthVal(left > right, resultTy); |

1028 | case BO_LE: |

1029 | return makeTruthVal(left <= right, resultTy); |

1030 | case BO_GE: |

1031 | return makeTruthVal(left >= right, resultTy); |

1032 | case BO_EQ: |

1033 | return makeTruthVal(left == right, resultTy); |

1034 | case BO_NE: |

1035 | return makeTruthVal(left != right, resultTy); |

1036 | } |

1037 | } |

1038 | |

1039 | // At this point we're not going to get a good answer, but we can try |

1040 | // conjuring an expression instead. |

1041 | SymbolRef LHSSym = lhs.getAsLocSymbol(); |

1042 | SymbolRef RHSSym = rhs.getAsLocSymbol(); |

1043 | if (LHSSym && RHSSym) |

1044 | return makeNonLoc(LHSSym, op, RHSSym, resultTy); |

1045 | |

1046 | // If we get here, we have no way of comparing the regions. |

1047 | return UnknownVal(); |

1048 | } |

1049 | } |

1050 | } |

1051 | |

1052 | SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, |

1053 | BinaryOperator::Opcode op, Loc lhs, |

1054 | NonLoc rhs, QualType resultTy) { |

1055 | if (op >= BO_PtrMemD && op <= BO_PtrMemI) { |

1056 | if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) { |

1057 | if (PTMSV->isNullMemberPointer()) |

1058 | return UndefinedVal(); |

1059 | |

1060 | auto getFieldLValue = [&](const auto *FD) -> SVal { |

1061 | SVal Result = lhs; |

1062 | |

1063 | for (const auto &I : *PTMSV) |

1064 | Result = StateMgr.getStoreManager().evalDerivedToBase( |

1065 | Result, I->getType(), I->isVirtual()); |

1066 | |

1067 | return state->getLValue(FD, Result); |

1068 | }; |

1069 | |

1070 | if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) { |

1071 | return getFieldLValue(FD); |

1072 | } |

1073 | if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) { |

1074 | return getFieldLValue(FD); |

1075 | } |

1076 | } |

1077 | |

1078 | return rhs; |

1079 | } |

1080 | |

1081 | assert(!BinaryOperator::isComparisonOp(op) && |

1082 | "arguments to comparison ops must be of the same type"); |

1083 | |

1084 | // Special case: rhs is a zero constant. |

1085 | if (rhs.isZeroConstant()) |

1086 | return lhs; |

1087 | |

1088 | // Perserve the null pointer so that it can be found by the DerefChecker. |

1089 | if (lhs.isZeroConstant()) |

1090 | return lhs; |

1091 | |

1092 | // We are dealing with pointer arithmetic. |

1093 | |

1094 | // Handle pointer arithmetic on constant values. |

1095 | if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) { |

1096 | if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) { |

1097 | const llvm::APSInt &leftI = lhsInt->getValue(); |

1098 | assert(leftI.isUnsigned()); |

1099 | llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true); |

1100 | |

1101 | // Convert the bitwidth of rightI. This should deal with overflow |

1102 | // since we are dealing with concrete values. |

1103 | rightI = rightI.extOrTrunc(leftI.getBitWidth()); |

1104 | |

1105 | // Offset the increment by the pointer size. |

1106 | llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true); |

1107 | QualType pointeeType = resultTy->getPointeeType(); |

1108 | Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity(); |

1109 | rightI *= Multiplicand; |

1110 | |

1111 | // Compute the adjusted pointer. |

1112 | switch (op) { |

1113 | case BO_Add: |

1114 | rightI = leftI + rightI; |

1115 | break; |

1116 | case BO_Sub: |

1117 | rightI = leftI - rightI; |

1118 | break; |

1119 | default: |

1120 | llvm_unreachable("Invalid pointer arithmetic operation"); |

1121 | } |

1122 | return loc::ConcreteInt(getBasicValueFactory().getValue(rightI)); |

1123 | } |

1124 | } |

1125 | |

1126 | // Handle cases where 'lhs' is a region. |

1127 | if (const MemRegion *region = lhs.getAsRegion()) { |

1128 | rhs = convertToArrayIndex(rhs).castAs<NonLoc>(); |

1129 | SVal index = UnknownVal(); |

1130 | const SubRegion *superR = nullptr; |

1131 | // We need to know the type of the pointer in order to add an integer to it. |

1132 | // Depending on the type, different amount of bytes is added. |

1133 | QualType elementType; |

1134 | |

1135 | if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) { |

1136 | assert(op == BO_Add || op == BO_Sub); |

1137 | index = evalBinOpNN(state, op, elemReg->getIndex(), rhs, |

1138 | getArrayIndexType()); |

1139 | superR = cast<SubRegion>(elemReg->getSuperRegion()); |

1140 | elementType = elemReg->getElementType(); |

1141 | } |

1142 | else if (isa<SubRegion>(region)) { |

1143 | assert(op == BO_Add || op == BO_Sub); |

1144 | index = (op == BO_Add) ? rhs : evalMinus(rhs); |

1145 | superR = cast<SubRegion>(region); |

1146 | // TODO: Is this actually reliable? Maybe improving our MemRegion |

1147 | // hierarchy to provide typed regions for all non-void pointers would be |

1148 | // better. For instance, we cannot extend this towards LocAsInteger |

1149 | // operations, where result type of the expression is integer. |

1150 | if (resultTy->isAnyPointerType()) |

1151 | elementType = resultTy->getPointeeType(); |

1152 | } |

1153 | |

1154 | // Represent arithmetic on void pointers as arithmetic on char pointers. |

1155 | // It is fine when a TypedValueRegion of char value type represents |

1156 | // a void pointer. Note that arithmetic on void pointers is a GCC extension. |

1157 | if (elementType->isVoidType()) |

1158 | elementType = getContext().CharTy; |

1159 | |

1160 | if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) { |

1161 | return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV, |

1162 | superR, getContext())); |

1163 | } |

1164 | } |

1165 | return UnknownVal(); |

1166 | } |

1167 | |

1168 | const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state, |

1169 | SVal V) { |

1170 | if (V.isUnknownOrUndef()) |

1171 | return nullptr; |

1172 | |

1173 | if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>()) |

1174 | return &X->getValue(); |

1175 | |

1176 | if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>()) |

1177 | return &X->getValue(); |

1178 | |

1179 | if (SymbolRef Sym = V.getAsSymbol()) |

1180 | return state->getConstraintManager().getSymVal(state, Sym); |

1181 | |

1182 | return nullptr; |

1183 | } |

1184 | |

1185 | const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, |

1186 | SVal V) { |

1187 | return getConstValue(state, simplifySVal(state, V)); |

1188 | } |

1189 | |

1190 | SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) { |

1191 | SVal SimplifiedVal = simplifySValOnce(State, Val); |

1192 | while (SimplifiedVal != Val) { |

1193 | Val = SimplifiedVal; |

1194 | SimplifiedVal = simplifySValOnce(State, Val); |

1195 | } |

1196 | return SimplifiedVal; |

1197 | } |

1198 | |

1199 | SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { |

1200 | return simplifyUntilFixpoint(State, V); |

1201 | } |

1202 | |

1203 | SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) { |

1204 | // For now, this function tries to constant-fold symbols inside a |

1205 | // nonloc::SymbolVal, and does nothing else. More simplifications should |

1206 | // be possible, such as constant-folding an index in an ElementRegion. |

1207 | |

1208 | class Simplifier : public FullSValVisitor<Simplifier, SVal> { |

1209 | ProgramStateRef State; |

1210 | SValBuilder &SVB; |

1211 | |

1212 | // Cache results for the lifetime of the Simplifier. Results change every |

1213 | // time new constraints are added to the program state, which is the whole |

1214 | // point of simplifying, and for that very reason it's pointless to maintain |

1215 | // the same cache for the duration of the whole analysis. |

1216 | llvm::DenseMap<SymbolRef, SVal> Cached; |

1217 | |

1218 | static bool isUnchanged(SymbolRef Sym, SVal Val) { |

1219 | return Sym == Val.getAsSymbol(); |

1220 | } |

1221 | |

1222 | SVal cache(SymbolRef Sym, SVal V) { |

1223 | Cached[Sym] = V; |

1224 | return V; |

1225 | } |

1226 | |

1227 | SVal skip(SymbolRef Sym) { |

1228 | return cache(Sym, SVB.makeSymbolVal(Sym)); |

1229 | } |

1230 | |

1231 | // Return the known const value for the Sym if available, or return Undef |

1232 | // otherwise. |

1233 | SVal getConst(SymbolRef Sym) { |

1234 | const llvm::APSInt *Const = |

1235 | State->getConstraintManager().getSymVal(State, Sym); |

1236 | if (Const) |

1237 | return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const) |

1238 | : (SVal)SVB.makeIntVal(*Const); |

1239 | return UndefinedVal(); |

1240 | } |

1241 | |

1242 | SVal getConstOrVisit(SymbolRef Sym) { |

1243 | const SVal Ret = getConst(Sym); |

1244 | if (Ret.isUndef()) |

1245 | return Visit(Sym); |

1246 | return Ret; |

1247 | } |

1248 | |

1249 | public: |

1250 | Simplifier(ProgramStateRef State) |

1251 | : State(State), SVB(State->getStateManager().getSValBuilder()) {} |

1252 | |

1253 | SVal VisitSymbolData(const SymbolData *S) { |

1254 | // No cache here. |

1255 | if (const llvm::APSInt *I = |

1256 | State->getConstraintManager().getSymVal(State, S)) |

1257 | return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I) |

1258 | : (SVal)SVB.makeIntVal(*I); |

1259 | return SVB.makeSymbolVal(S); |

1260 | } |

1261 | |

1262 | SVal VisitSymIntExpr(const SymIntExpr *S) { |

1263 | auto I = Cached.find(S); |

1264 | if (I != Cached.end()) |

1265 | return I->second; |

1266 | |

1267 | SVal LHS = getConstOrVisit(S->getLHS()); |

1268 | if (isUnchanged(S->getLHS(), LHS)) |

1269 | return skip(S); |

1270 | |

1271 | SVal RHS; |

1272 | // By looking at the APSInt in the right-hand side of S, we cannot |

1273 | // figure out if it should be treated as a Loc or as a NonLoc. |

1274 | // So make our guess by recalling that we cannot multiply pointers |

1275 | // or compare a pointer to an integer. |

1276 | if (Loc::isLocType(S->getLHS()->getType()) && |

1277 | BinaryOperator::isComparisonOp(S->getOpcode())) { |

1278 | // The usual conversion of $sym to &SymRegion{$sym}, as they have |

1279 | // the same meaning for Loc-type symbols, but the latter form |

1280 | // is preferred in SVal computations for being Loc itself. |

1281 | if (SymbolRef Sym = LHS.getAsSymbol()) { |

1282 | assert(Loc::isLocType(Sym->getType())); |

1283 | LHS = SVB.makeLoc(Sym); |

1284 | } |

1285 | RHS = SVB.makeIntLocVal(S->getRHS()); |

1286 | } else { |

1287 | RHS = SVB.makeIntVal(S->getRHS()); |

1288 | } |

1289 | |

1290 | return cache( |

1291 | S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); |

1292 | } |

1293 | |

1294 | SVal VisitIntSymExpr(const IntSymExpr *S) { |

1295 | auto I = Cached.find(S); |

1296 | if (I != Cached.end()) |

1297 | return I->second; |

1298 | |

1299 | SVal RHS = getConstOrVisit(S->getRHS()); |

1300 | if (isUnchanged(S->getRHS(), RHS)) |

1301 | return skip(S); |

1302 | |

1303 | SVal LHS = SVB.makeIntVal(S->getLHS()); |

1304 | return cache( |

1305 | S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); |

1306 | } |

1307 | |

1308 | SVal VisitSymSymExpr(const SymSymExpr *S) { |

1309 | auto I = Cached.find(S); |

1310 | if (I != Cached.end()) |

1311 | return I->second; |

1312 | |

1313 | // For now don't try to simplify mixed Loc/NonLoc expressions |

1314 | // because they often appear from LocAsInteger operations |

1315 | // and we don't know how to combine a LocAsInteger |

1316 | // with a concrete value. |

1317 | if (Loc::isLocType(S->getLHS()->getType()) != |

1318 | Loc::isLocType(S->getRHS()->getType())) |

1319 | return skip(S); |

1320 | |

1321 | SVal LHS = getConstOrVisit(S->getLHS()); |

1322 | SVal RHS = getConstOrVisit(S->getRHS()); |

1323 | |

1324 | if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS)) |

1325 | return skip(S); |

1326 | |

1327 | return cache( |

1328 | S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); |

1329 | } |

1330 | |

1331 | SVal VisitSymbolCast(const SymbolCast *S) { |

1332 | auto I = Cached.find(S); |

1333 | if (I != Cached.end()) |

1334 | return I->second; |

1335 | const SymExpr *OpSym = S->getOperand(); |

1336 | SVal OpVal = getConstOrVisit(OpSym); |

1337 | if (isUnchanged(OpSym, OpVal)) |

1338 | return skip(S); |

1339 | |

1340 | return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType())); |

1341 | } |

1342 | |

1343 | SVal VisitUnarySymExpr(const UnarySymExpr *S) { |

1344 | auto I = Cached.find(S); |

1345 | if (I != Cached.end()) |

1346 | return I->second; |

1347 | SVal Op = getConstOrVisit(S->getOperand()); |

1348 | if (isUnchanged(S->getOperand(), Op)) |

1349 | return skip(S); |

1350 | |

1351 | return cache( |

1352 | S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType())); |

1353 | } |

1354 | |

1355 | SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); } |

1356 | |

1357 | SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); } |

1358 | |

1359 | SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) { |

1360 | // Simplification is much more costly than computing complexity. |

1361 | // For high complexity, it may be not worth it. |

1362 | return Visit(V.getSymbol()); |

1363 | } |

1364 | |

1365 | SVal VisitSVal(SVal V) { return V; } |

1366 | }; |

1367 | |

1368 | SVal SimplifiedV = Simplifier(State).Visit(V); |

1369 | return SimplifiedV; |

1370 | } |

1371 |