| 1 | //===- bolt/Passes/PAuthGadgetScanner.cpp ---------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements a pass that looks for any AArch64 return instructions |
| 10 | // that may not be protected by PAuth authentication instructions when needed. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "bolt/Passes/PAuthGadgetScanner.h" |
| 15 | #include "bolt/Core/ParallelUtilities.h" |
| 16 | #include "bolt/Passes/DataflowAnalysis.h" |
| 17 | #include "llvm/ADT/STLExtras.h" |
| 18 | #include "llvm/ADT/SmallSet.h" |
| 19 | #include "llvm/MC/MCInst.h" |
| 20 | #include "llvm/Support/Format.h" |
| 21 | #include <memory> |
| 22 | |
| 23 | #define DEBUG_TYPE "bolt-pauth-scanner" |
| 24 | |
| 25 | namespace llvm { |
| 26 | namespace bolt { |
| 27 | |
| 28 | raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) { |
| 29 | OS << "MCInstBBRef<" ; |
| 30 | if (Ref.BB == nullptr) |
| 31 | OS << "BB:(null)" ; |
| 32 | else |
| 33 | OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex; |
| 34 | OS << ">" ; |
| 35 | return OS; |
| 36 | } |
| 37 | |
| 38 | raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) { |
| 39 | OS << "MCInstBFRef<" ; |
| 40 | if (Ref.BF == nullptr) |
| 41 | OS << "BF:(null)" ; |
| 42 | else |
| 43 | OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset(); |
| 44 | OS << ">" ; |
| 45 | return OS; |
| 46 | } |
| 47 | |
| 48 | raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) { |
| 49 | switch (Ref.ParentKind) { |
| 50 | case MCInstReference::BasicBlockParent: |
| 51 | OS << Ref.U.BBRef; |
| 52 | return OS; |
| 53 | case MCInstReference::FunctionParent: |
| 54 | OS << Ref.U.BFRef; |
| 55 | return OS; |
| 56 | } |
| 57 | llvm_unreachable("" ); |
| 58 | } |
| 59 | |
| 60 | namespace PAuthGadgetScanner { |
| 61 | |
| 62 | [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label, |
| 63 | const MCInst &MI) { |
| 64 | dbgs() << " " << Label << ": " ; |
| 65 | BC.printInstruction(OS&: dbgs(), Instruction: MI); |
| 66 | } |
| 67 | |
| 68 | [[maybe_unused]] static void traceReg(const BinaryContext &BC, StringRef Label, |
| 69 | MCPhysReg Reg) { |
| 70 | dbgs() << " " << Label << ": " ; |
| 71 | if (Reg == BC.MIB->getNoRegister()) |
| 72 | dbgs() << "(none)" ; |
| 73 | else |
| 74 | dbgs() << BC.MRI->getName(RegNo: Reg); |
| 75 | dbgs() << "\n" ; |
| 76 | } |
| 77 | |
| 78 | [[maybe_unused]] static void traceRegMask(const BinaryContext &BC, |
| 79 | StringRef Label, BitVector Mask) { |
| 80 | dbgs() << " " << Label << ": " ; |
| 81 | RegStatePrinter(BC).print(OS&: dbgs(), State: Mask); |
| 82 | dbgs() << "\n" ; |
| 83 | } |
| 84 | |
| 85 | // This class represents mapping from a set of arbitrary physical registers to |
| 86 | // consecutive array indexes. |
| 87 | class TrackedRegisters { |
| 88 | static constexpr uint16_t NoIndex = -1; |
| 89 | const std::vector<MCPhysReg> Registers; |
| 90 | std::vector<uint16_t> RegToIndexMapping; |
| 91 | |
| 92 | static size_t getMappingSize(ArrayRef<MCPhysReg> RegsToTrack) { |
| 93 | if (RegsToTrack.empty()) |
| 94 | return 0; |
| 95 | return 1 + *llvm::max_element(Range&: RegsToTrack); |
| 96 | } |
| 97 | |
| 98 | public: |
| 99 | TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack) |
| 100 | : Registers(RegsToTrack), |
| 101 | RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) { |
| 102 | for (unsigned I = 0; I < RegsToTrack.size(); ++I) |
| 103 | RegToIndexMapping[RegsToTrack[I]] = I; |
| 104 | } |
| 105 | |
| 106 | ArrayRef<MCPhysReg> getRegisters() const { return Registers; } |
| 107 | |
| 108 | size_t getNumTrackedRegisters() const { return Registers.size(); } |
| 109 | |
| 110 | bool empty() const { return Registers.empty(); } |
| 111 | |
| 112 | bool isTracked(MCPhysReg Reg) const { |
| 113 | bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() && |
| 114 | RegToIndexMapping[Reg] != NoIndex; |
| 115 | assert(IsTracked == llvm::is_contained(Registers, Reg)); |
| 116 | return IsTracked; |
| 117 | } |
| 118 | |
| 119 | unsigned getIndex(MCPhysReg Reg) const { |
| 120 | assert(isTracked(Reg) && "Register is not tracked" ); |
| 121 | return RegToIndexMapping[Reg]; |
| 122 | } |
| 123 | }; |
| 124 | |
| 125 | // The security property that is checked is: |
| 126 | // When a register is used as the address to jump to in a return instruction, |
| 127 | // that register must be safe-to-dereference. It must either |
| 128 | // (a) be safe-to-dereference at function entry and never be changed within this |
| 129 | // function, i.e. have the same value as when the function started, or |
| 130 | // (b) the last write to the register must be by an authentication instruction. |
| 131 | |
| 132 | // This property is checked by using dataflow analysis to keep track of which |
| 133 | // registers have been written (def-ed), since last authenticated. For pac-ret, |
| 134 | // any return instruction using a register which is not safe-to-dereference is |
| 135 | // a gadget to be reported. For PAuthABI, probably at least any indirect control |
| 136 | // flow using such a register should be reported. |
| 137 | |
| 138 | // Furthermore, when producing a diagnostic for a found non-pac-ret protected |
| 139 | // return, the analysis also lists the last instructions that wrote to the |
| 140 | // register used in the return instruction. |
| 141 | // The total set of registers used in return instructions in a given function is |
| 142 | // small. It almost always is just `X30`. |
| 143 | // In order to reduce the memory consumption of storing this additional state |
| 144 | // during the dataflow analysis, this is computed by running the dataflow |
| 145 | // analysis twice: |
| 146 | // 1. In the first run, the dataflow analysis only keeps track of the security |
| 147 | // property: i.e. which registers have been overwritten since the last |
| 148 | // time they've been authenticated. |
| 149 | // 2. If the first run finds any return instructions using a register last |
| 150 | // written by a non-authenticating instruction, the dataflow analysis will |
| 151 | // be run a second time. The first run will return which registers are used |
| 152 | // in the gadgets to be reported. This information is used in the second run |
| 153 | // to also track which instructions last wrote to those registers. |
| 154 | |
| 155 | /// A state representing which registers are safe to use by an instruction |
| 156 | /// at a given program point. |
| 157 | /// |
| 158 | /// To simplify reasoning, let's stick with the following approach: |
| 159 | /// * when state is updated by the data-flow analysis, the sub-, super- and |
| 160 | /// overlapping registers are marked as needed |
| 161 | /// * when the particular instruction is checked if it represents a gadget, |
| 162 | /// the specific bit of BitVector should be usable to answer this. |
| 163 | /// |
| 164 | /// For example, on AArch64: |
| 165 | /// * An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as |
| 166 | /// safe-to-dereference. It does not change the state of X0_X1, for example, |
| 167 | /// as super-registers partially retain their old, unsafe values. |
| 168 | /// * LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps |
| 169 | /// with: W1, W1_HI, X0_X1 and so on. |
| 170 | /// * RET (which is implicitly RET X30) is a protected return if and only if |
| 171 | /// X30 is safe-to-dereference - the state computed for sub- and |
| 172 | /// super-registers is not inspected. |
| 173 | struct SrcState { |
| 174 | /// A BitVector containing the registers that are either authenticated |
| 175 | /// (assuming failed authentication is permitted to produce an invalid |
| 176 | /// address, provided it generates an error on memory access) or whose |
| 177 | /// value is known not to be attacker-controlled under Pointer Authentication |
| 178 | /// threat model. The registers in this set are either |
| 179 | /// * not clobbered since being authenticated, or |
| 180 | /// * trusted at function entry and were not clobbered yet, or |
| 181 | /// * contain a safely materialized address. |
| 182 | BitVector SafeToDerefRegs; |
| 183 | /// A BitVector containing the registers that are either authenticated |
| 184 | /// *successfully* or whose value is known not to be attacker-controlled |
| 185 | /// under Pointer Authentication threat model. |
| 186 | /// The registers in this set are either |
| 187 | /// * authenticated and then checked to be authenticated successfully |
| 188 | /// (and not clobbered since then), or |
| 189 | /// * trusted at function entry and were not clobbered yet, or |
| 190 | /// * contain a safely materialized address. |
| 191 | BitVector TrustedRegs; |
| 192 | /// A vector of sets, only used in the second data flow run. |
| 193 | /// Each element in the vector represents one of the registers for which we |
| 194 | /// track the set of last instructions that wrote to this register. For |
| 195 | /// pac-ret analysis, the expectation is that almost all return instructions |
| 196 | /// only use register `X30`, and therefore, this vector will probably have |
| 197 | /// length 1 in the second run. |
| 198 | std::vector<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg; |
| 199 | |
| 200 | /// Construct an empty state. |
| 201 | SrcState() {} |
| 202 | |
| 203 | SrcState(unsigned NumRegs, unsigned NumRegsToTrack) |
| 204 | : SafeToDerefRegs(NumRegs), TrustedRegs(NumRegs), |
| 205 | LastInstWritingReg(NumRegsToTrack) {} |
| 206 | |
| 207 | SrcState &merge(const SrcState &StateIn) { |
| 208 | if (StateIn.empty()) |
| 209 | return *this; |
| 210 | if (empty()) |
| 211 | return (*this = StateIn); |
| 212 | |
| 213 | SafeToDerefRegs &= StateIn.SafeToDerefRegs; |
| 214 | TrustedRegs &= StateIn.TrustedRegs; |
| 215 | for (unsigned I = 0; I < LastInstWritingReg.size(); ++I) |
| 216 | for (const MCInst *J : StateIn.LastInstWritingReg[I]) |
| 217 | LastInstWritingReg[I].insert(Ptr: J); |
| 218 | return *this; |
| 219 | } |
| 220 | |
| 221 | /// Returns true if this object does not store state of any registers - |
| 222 | /// neither safe, nor unsafe ones. |
| 223 | bool empty() const { return SafeToDerefRegs.empty(); } |
| 224 | |
| 225 | bool operator==(const SrcState &RHS) const { |
| 226 | return SafeToDerefRegs == RHS.SafeToDerefRegs && |
| 227 | TrustedRegs == RHS.TrustedRegs && |
| 228 | LastInstWritingReg == RHS.LastInstWritingReg; |
| 229 | } |
| 230 | bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); } |
| 231 | }; |
| 232 | |
| 233 | static void |
| 234 | printLastInsts(raw_ostream &OS, |
| 235 | ArrayRef<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg) { |
| 236 | OS << "Insts: " ; |
| 237 | for (unsigned I = 0; I < LastInstWritingReg.size(); ++I) { |
| 238 | auto &Set = LastInstWritingReg[I]; |
| 239 | OS << "[" << I << "](" ; |
| 240 | for (const MCInst *MCInstP : Set) |
| 241 | OS << MCInstP << " " ; |
| 242 | OS << ")" ; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) { |
| 247 | OS << "src-state<" ; |
| 248 | if (S.empty()) { |
| 249 | OS << "empty" ; |
| 250 | } else { |
| 251 | OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", " ; |
| 252 | OS << "TrustedRegs: " << S.TrustedRegs << ", " ; |
| 253 | printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg); |
| 254 | } |
| 255 | OS << ">" ; |
| 256 | return OS; |
| 257 | } |
| 258 | |
| 259 | class SrcStatePrinter { |
| 260 | public: |
| 261 | void print(raw_ostream &OS, const SrcState &State) const; |
| 262 | explicit SrcStatePrinter(const BinaryContext &BC) : BC(BC) {} |
| 263 | |
| 264 | private: |
| 265 | const BinaryContext &BC; |
| 266 | }; |
| 267 | |
| 268 | void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const { |
| 269 | RegStatePrinter RegStatePrinter(BC); |
| 270 | OS << "src-state<" ; |
| 271 | if (S.empty()) { |
| 272 | assert(S.SafeToDerefRegs.empty()); |
| 273 | assert(S.TrustedRegs.empty()); |
| 274 | assert(S.LastInstWritingReg.empty()); |
| 275 | OS << "empty" ; |
| 276 | } else { |
| 277 | OS << "SafeToDerefRegs: " ; |
| 278 | RegStatePrinter.print(OS, State: S.SafeToDerefRegs); |
| 279 | OS << ", TrustedRegs: " ; |
| 280 | RegStatePrinter.print(OS, State: S.TrustedRegs); |
| 281 | OS << ", " ; |
| 282 | printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg); |
| 283 | } |
| 284 | OS << ">" ; |
| 285 | } |
| 286 | |
| 287 | /// Computes which registers are safe to be used by control flow and signing |
| 288 | /// instructions. |
| 289 | /// |
| 290 | /// This is the base class for two implementations: a dataflow-based analysis |
| 291 | /// which is intended to be used for most functions and a simplified CFG-unaware |
| 292 | /// version for functions without reconstructed CFG. |
| 293 | class SrcSafetyAnalysis { |
| 294 | public: |
| 295 | SrcSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor) |
| 296 | : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()), |
| 297 | RegsToTrackInstsFor(RegsToTrackInstsFor) {} |
| 298 | |
| 299 | virtual ~SrcSafetyAnalysis() {} |
| 300 | |
| 301 | static std::shared_ptr<SrcSafetyAnalysis> |
| 302 | create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, |
| 303 | ArrayRef<MCPhysReg> RegsToTrackInstsFor); |
| 304 | |
| 305 | virtual void run() = 0; |
| 306 | virtual const SrcState &getStateBefore(const MCInst &Inst) const = 0; |
| 307 | |
| 308 | protected: |
| 309 | BinaryContext &BC; |
| 310 | const unsigned NumRegs; |
| 311 | /// RegToTrackInstsFor is the set of registers for which the dataflow analysis |
| 312 | /// must compute which the last set of instructions writing to it are. |
| 313 | const TrackedRegisters RegsToTrackInstsFor; |
| 314 | /// Stores information about the detected instruction sequences emitted to |
| 315 | /// check an authenticated pointer. Specifically, if such sequence is detected |
| 316 | /// in a basic block, it maps the last instruction of that basic block to |
| 317 | /// (CheckedRegister, FirstInstOfTheSequence) pair, see the description of |
| 318 | /// MCPlusBuilder::getAuthCheckedReg(BB) method. |
| 319 | /// |
| 320 | /// As the detection of such sequences requires iterating over the adjacent |
| 321 | /// instructions, it should be done before calling computeNext(), which |
| 322 | /// operates on separate instructions. |
| 323 | DenseMap<const MCInst *, std::pair<MCPhysReg, const MCInst *>> |
| 324 | CheckerSequenceInfo; |
| 325 | |
| 326 | SmallPtrSet<const MCInst *, 4> &lastWritingInsts(SrcState &S, |
| 327 | MCPhysReg Reg) const { |
| 328 | unsigned Index = RegsToTrackInstsFor.getIndex(Reg); |
| 329 | return S.LastInstWritingReg[Index]; |
| 330 | } |
| 331 | const SmallPtrSet<const MCInst *, 4> &lastWritingInsts(const SrcState &S, |
| 332 | MCPhysReg Reg) const { |
| 333 | unsigned Index = RegsToTrackInstsFor.getIndex(Reg); |
| 334 | return S.LastInstWritingReg[Index]; |
| 335 | } |
| 336 | |
| 337 | SrcState createEntryState() { |
| 338 | SrcState S(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); |
| 339 | for (MCPhysReg Reg : BC.MIB->getTrustedLiveInRegs()) |
| 340 | S.TrustedRegs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true); |
| 341 | S.SafeToDerefRegs = S.TrustedRegs; |
| 342 | return S; |
| 343 | } |
| 344 | |
| 345 | BitVector getClobberedRegs(const MCInst &Point) const { |
| 346 | BitVector Clobbered(NumRegs); |
| 347 | // Assume a call can clobber all registers, including callee-saved |
| 348 | // registers. There's a good chance that callee-saved registers will be |
| 349 | // saved on the stack at some point during execution of the callee. |
| 350 | // Therefore they should also be considered as potentially modified by an |
| 351 | // attacker/written to. |
| 352 | // Also, not all functions may respect the AAPCS ABI rules about |
| 353 | // caller/callee-saved registers. |
| 354 | if (BC.MIB->isCall(Inst: Point)) |
| 355 | Clobbered.set(); |
| 356 | else |
| 357 | BC.MIB->getClobberedRegs(Inst: Point, Regs&: Clobbered); |
| 358 | return Clobbered; |
| 359 | } |
| 360 | |
| 361 | // Returns all registers that can be treated as if they are written by an |
| 362 | // authentication instruction. |
| 363 | SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point, |
| 364 | const SrcState &Cur) const { |
| 365 | SmallVector<MCPhysReg> Regs; |
| 366 | |
| 367 | // A signed pointer can be authenticated, or |
| 368 | bool Dummy = false; |
| 369 | if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked&: Dummy)) |
| 370 | Regs.push_back(Elt: *AutReg); |
| 371 | |
| 372 | // ... a safe address can be materialized, or |
| 373 | if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point)) |
| 374 | Regs.push_back(Elt: *NewAddrReg); |
| 375 | |
| 376 | // ... an address can be updated in a safe manner, producing the result |
| 377 | // which is as trusted as the input address. |
| 378 | if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) { |
| 379 | if (Cur.SafeToDerefRegs[DstAndSrc->second]) |
| 380 | Regs.push_back(Elt: DstAndSrc->first); |
| 381 | } |
| 382 | |
| 383 | return Regs; |
| 384 | } |
| 385 | |
| 386 | // Returns all registers made trusted by this instruction. |
| 387 | SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point, |
| 388 | const SrcState &Cur) const { |
| 389 | SmallVector<MCPhysReg> Regs; |
| 390 | |
| 391 | // An authenticated pointer can be checked, or |
| 392 | std::optional<MCPhysReg> CheckedReg = |
| 393 | BC.MIB->getAuthCheckedReg(Inst: Point, /*MayOverwrite=*/false); |
| 394 | if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg]) |
| 395 | Regs.push_back(Elt: *CheckedReg); |
| 396 | |
| 397 | // ... a pointer can be authenticated by an instruction that always checks |
| 398 | // the pointer, or |
| 399 | bool IsChecked = false; |
| 400 | std::optional<MCPhysReg> AutReg = |
| 401 | BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked); |
| 402 | if (AutReg && IsChecked) |
| 403 | Regs.push_back(Elt: *AutReg); |
| 404 | |
| 405 | if (CheckerSequenceInfo.contains(Val: &Point)) { |
| 406 | MCPhysReg CheckedReg; |
| 407 | const MCInst *FirstCheckerInst; |
| 408 | std::tie(args&: CheckedReg, args&: FirstCheckerInst) = CheckerSequenceInfo.at(Val: &Point); |
| 409 | |
| 410 | // FirstCheckerInst should belong to the same basic block (see the |
| 411 | // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was |
| 412 | // deterministically processed a few steps before this instruction. |
| 413 | const SrcState &StateBeforeChecker = getStateBefore(Inst: *FirstCheckerInst); |
| 414 | if (StateBeforeChecker.SafeToDerefRegs[CheckedReg]) |
| 415 | Regs.push_back(Elt: CheckedReg); |
| 416 | } |
| 417 | |
| 418 | // ... a safe address can be materialized, or |
| 419 | if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point)) |
| 420 | Regs.push_back(Elt: *NewAddrReg); |
| 421 | |
| 422 | // ... an address can be updated in a safe manner, producing the result |
| 423 | // which is as trusted as the input address. |
| 424 | if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) { |
| 425 | if (Cur.TrustedRegs[DstAndSrc->second]) |
| 426 | Regs.push_back(Elt: DstAndSrc->first); |
| 427 | } |
| 428 | |
| 429 | return Regs; |
| 430 | } |
| 431 | |
| 432 | SrcState computeNext(const MCInst &Point, const SrcState &Cur) { |
| 433 | SrcStatePrinter P(BC); |
| 434 | LLVM_DEBUG({ |
| 435 | dbgs() << " SrcSafetyAnalysis::ComputeNext(" ; |
| 436 | BC.InstPrinter->printInst(&const_cast<MCInst &>(Point), 0, "" , *BC.STI, |
| 437 | dbgs()); |
| 438 | dbgs() << ", " ; |
| 439 | P.print(dbgs(), Cur); |
| 440 | dbgs() << ")\n" ; |
| 441 | }); |
| 442 | |
| 443 | // If this instruction is reachable, a non-empty state will be propagated |
| 444 | // to it from the entry basic block sooner or later. Until then, it is both |
| 445 | // more efficient and easier to reason about to skip computeNext(). |
| 446 | if (Cur.empty()) { |
| 447 | LLVM_DEBUG( |
| 448 | { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n" ; }); |
| 449 | return SrcState(); |
| 450 | } |
| 451 | |
| 452 | // First, compute various properties of the instruction, taking the state |
| 453 | // before its execution into account, if necessary. |
| 454 | |
| 455 | BitVector Clobbered = getClobberedRegs(Point); |
| 456 | SmallVector<MCPhysReg> NewSafeToDerefRegs = |
| 457 | getRegsMadeSafeToDeref(Point, Cur); |
| 458 | SmallVector<MCPhysReg> NewTrustedRegs = getRegsMadeTrusted(Point, Cur); |
| 459 | |
| 460 | // Ideally, being trusted is a strictly stronger property than being |
| 461 | // safe-to-dereference. To simplify the computation of Next state, enforce |
| 462 | // this for NewSafeToDerefRegs and NewTrustedRegs. Additionally, this |
| 463 | // fixes the properly for "cumulative" register states in tricky cases |
| 464 | // like the following: |
| 465 | // |
| 466 | // ; LR is safe to dereference here |
| 467 | // mov x16, x30 ; start of the sequence, LR is s-t-d right before |
| 468 | // xpaclri ; clobbers LR, LR is not safe anymore |
| 469 | // cmp x30, x16 |
| 470 | // b.eq 1f ; end of the sequence: LR is marked as trusted |
| 471 | // brk 0x1234 |
| 472 | // 1: |
| 473 | // ; at this point LR would be marked as trusted, |
| 474 | // ; but not safe-to-dereference |
| 475 | // |
| 476 | for (auto TrustedReg : NewTrustedRegs) { |
| 477 | if (!is_contained(Range&: NewSafeToDerefRegs, Element: TrustedReg)) |
| 478 | NewSafeToDerefRegs.push_back(Elt: TrustedReg); |
| 479 | } |
| 480 | |
| 481 | // Then, compute the state after this instruction is executed. |
| 482 | SrcState Next = Cur; |
| 483 | |
| 484 | Next.SafeToDerefRegs.reset(RHS: Clobbered); |
| 485 | Next.TrustedRegs.reset(RHS: Clobbered); |
| 486 | // Keep track of this instruction if it writes to any of the registers we |
| 487 | // need to track that for: |
| 488 | for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) |
| 489 | if (Clobbered[Reg]) |
| 490 | lastWritingInsts(S&: Next, Reg) = {&Point}; |
| 491 | |
| 492 | // After accounting for clobbered registers in general, override the state |
| 493 | // according to authentication and other *special cases* of clobbering. |
| 494 | |
| 495 | // The sub-registers are also safe-to-dereference now, but not their |
| 496 | // super-registers (as they retain untrusted register units). |
| 497 | BitVector NewSafeSubregs(NumRegs); |
| 498 | for (MCPhysReg SafeReg : NewSafeToDerefRegs) |
| 499 | NewSafeSubregs |= BC.MIB->getAliases(Reg: SafeReg, /*OnlySmaller=*/true); |
| 500 | for (MCPhysReg Reg : NewSafeSubregs.set_bits()) { |
| 501 | Next.SafeToDerefRegs.set(Reg); |
| 502 | if (RegsToTrackInstsFor.isTracked(Reg)) |
| 503 | lastWritingInsts(S&: Next, Reg).clear(); |
| 504 | } |
| 505 | |
| 506 | // Process new trusted registers. |
| 507 | for (MCPhysReg TrustedReg : NewTrustedRegs) |
| 508 | Next.TrustedRegs |= BC.MIB->getAliases(Reg: TrustedReg, /*OnlySmaller=*/true); |
| 509 | |
| 510 | LLVM_DEBUG({ |
| 511 | dbgs() << " .. result: (" ; |
| 512 | P.print(dbgs(), Next); |
| 513 | dbgs() << ")\n" ; |
| 514 | }); |
| 515 | |
| 516 | return Next; |
| 517 | } |
| 518 | |
| 519 | public: |
| 520 | std::vector<MCInstReference> |
| 521 | getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF, |
| 522 | MCPhysReg ClobberedReg) const { |
| 523 | const SrcState &S = getStateBefore(Inst); |
| 524 | |
| 525 | std::vector<MCInstReference> Result; |
| 526 | for (const MCInst *Inst : lastWritingInsts(S, Reg: ClobberedReg)) { |
| 527 | MCInstReference Ref = MCInstReference::get(Inst, BF); |
| 528 | assert(Ref && "Expected Inst to be found" ); |
| 529 | Result.push_back(x: Ref); |
| 530 | } |
| 531 | return Result; |
| 532 | } |
| 533 | }; |
| 534 | |
| 535 | class DataflowSrcSafetyAnalysis |
| 536 | : public SrcSafetyAnalysis, |
| 537 | public DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, |
| 538 | /*Backward=*/false, SrcStatePrinter> { |
| 539 | using DFParent = DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, false, |
| 540 | SrcStatePrinter>; |
| 541 | friend DFParent; |
| 542 | |
| 543 | using SrcSafetyAnalysis::BC; |
| 544 | using SrcSafetyAnalysis::computeNext; |
| 545 | |
| 546 | public: |
| 547 | DataflowSrcSafetyAnalysis(BinaryFunction &BF, |
| 548 | MCPlusBuilder::AllocatorIdTy AllocId, |
| 549 | ArrayRef<MCPhysReg> RegsToTrackInstsFor) |
| 550 | : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), DFParent(BF, AllocId) {} |
| 551 | |
| 552 | const SrcState &getStateBefore(const MCInst &Inst) const override { |
| 553 | return DFParent::getStateBefore(Point: Inst).get(); |
| 554 | } |
| 555 | |
| 556 | void run() override { |
| 557 | for (BinaryBasicBlock &BB : Func) { |
| 558 | if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { |
| 559 | MCPhysReg CheckedReg = CheckerInfo->first; |
| 560 | MCInst &FirstInst = *CheckerInfo->second; |
| 561 | MCInst &LastInst = *BB.getLastNonPseudoInstr(); |
| 562 | LLVM_DEBUG({ |
| 563 | dbgs() << "Found pointer checking sequence in " << BB.getName() |
| 564 | << ":\n" ; |
| 565 | traceReg(BC, "Checked register" , CheckedReg); |
| 566 | traceInst(BC, "First instruction" , FirstInst); |
| 567 | traceInst(BC, "Last instruction" , LastInst); |
| 568 | }); |
| 569 | (void)CheckedReg; |
| 570 | (void)FirstInst; |
| 571 | assert(llvm::any_of(BB, [&](MCInst &I) { return &I == &FirstInst; }) && |
| 572 | "Data-flow analysis expects the checker not to cross BBs" ); |
| 573 | CheckerSequenceInfo[&LastInst] = *CheckerInfo; |
| 574 | } |
| 575 | } |
| 576 | DFParent::run(); |
| 577 | } |
| 578 | |
| 579 | protected: |
| 580 | void preflight() {} |
| 581 | |
| 582 | SrcState getStartingStateAtBB(const BinaryBasicBlock &BB) { |
| 583 | if (BB.isEntryPoint()) |
| 584 | return createEntryState(); |
| 585 | |
| 586 | return SrcState(); |
| 587 | } |
| 588 | |
| 589 | SrcState getStartingStateAtPoint(const MCInst &Point) { return SrcState(); } |
| 590 | |
| 591 | void doConfluence(SrcState &StateOut, const SrcState &StateIn) { |
| 592 | SrcStatePrinter P(BC); |
| 593 | LLVM_DEBUG({ |
| 594 | dbgs() << " DataflowSrcSafetyAnalysis::Confluence(\n" ; |
| 595 | dbgs() << " State 1: " ; |
| 596 | P.print(dbgs(), StateOut); |
| 597 | dbgs() << "\n" ; |
| 598 | dbgs() << " State 2: " ; |
| 599 | P.print(dbgs(), StateIn); |
| 600 | dbgs() << ")\n" ; |
| 601 | }); |
| 602 | |
| 603 | StateOut.merge(StateIn); |
| 604 | |
| 605 | LLVM_DEBUG({ |
| 606 | dbgs() << " merged state: " ; |
| 607 | P.print(dbgs(), StateOut); |
| 608 | dbgs() << "\n" ; |
| 609 | }); |
| 610 | } |
| 611 | |
| 612 | StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis" ; } |
| 613 | }; |
| 614 | |
| 615 | // A simplified implementation of DataflowSrcSafetyAnalysis for functions |
| 616 | // lacking CFG information. |
| 617 | // |
| 618 | // Let assume the instructions can only be executed linearly unless there is |
| 619 | // a label to jump to - this should handle both directly jumping to a location |
| 620 | // encoded as an immediate operand of a branch instruction, as well as saving a |
| 621 | // branch destination somewhere and passing it to an indirect branch instruction |
| 622 | // later, provided no arithmetic is performed on the destination address: |
| 623 | // |
| 624 | // ; good: the destination is directly encoded into the branch instruction |
| 625 | // cbz x0, some_label |
| 626 | // |
| 627 | // ; good: the branch destination is first stored and then used as-is |
| 628 | // adr x1, some_label |
| 629 | // br x1 |
| 630 | // |
| 631 | // ; bad: some clever arithmetic is performed manually |
| 632 | // adr x1, some_label |
| 633 | // add x1, x1, #4 |
| 634 | // br x1 |
| 635 | // ... |
| 636 | // some_label: |
| 637 | // ; pessimistically reset the state as we are unsure where we came from |
| 638 | // ... |
| 639 | // ret |
| 640 | // JTI0: |
| 641 | // .byte some_label - Ltmp0 ; computing offsets using labels may probably |
| 642 | // work too, provided enough information is |
| 643 | // retained by the assembler and linker |
| 644 | // |
| 645 | // Then, a function can be split into a number of disjoint contiguous sequences |
| 646 | // of instructions without labels in between. These sequences can be processed |
| 647 | // the same way basic blocks are processed by data-flow analysis, assuming |
| 648 | // pessimistically that all registers are unsafe at the start of each sequence. |
| 649 | class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis { |
| 650 | BinaryFunction &BF; |
| 651 | MCPlusBuilder::AllocatorIdTy AllocId; |
| 652 | unsigned StateAnnotationIndex; |
| 653 | |
| 654 | void cleanStateAnnotations() { |
| 655 | for (auto &I : BF.instrs()) |
| 656 | BC.MIB->removeAnnotation(Inst&: I.second, Index: StateAnnotationIndex); |
| 657 | } |
| 658 | |
| 659 | /// Creates a state with all registers marked unsafe (not to be confused |
| 660 | /// with empty state). |
| 661 | SrcState createUnsafeState() const { |
| 662 | return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); |
| 663 | } |
| 664 | |
| 665 | public: |
| 666 | CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF, |
| 667 | MCPlusBuilder::AllocatorIdTy AllocId, |
| 668 | ArrayRef<MCPhysReg> RegsToTrackInstsFor) |
| 669 | : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), BF(BF), AllocId(AllocId) { |
| 670 | StateAnnotationIndex = |
| 671 | BC.MIB->getOrCreateAnnotationIndex(Name: "CFGUnawareSrcSafetyAnalysis" ); |
| 672 | } |
| 673 | |
| 674 | void run() override { |
| 675 | SrcState S = createEntryState(); |
| 676 | for (auto &I : BF.instrs()) { |
| 677 | MCInst &Inst = I.second; |
| 678 | |
| 679 | // If there is a label before this instruction, it is possible that it |
| 680 | // can be jumped-to, thus conservatively resetting S. As an exception, |
| 681 | // let's ignore any labels at the beginning of the function, as at least |
| 682 | // one label is expected there. |
| 683 | if (BF.hasLabelAt(Offset: I.first) && &Inst != &BF.instrs().begin()->second) { |
| 684 | LLVM_DEBUG({ |
| 685 | traceInst(BC, "Due to label, resetting the state before" , Inst); |
| 686 | }); |
| 687 | S = createUnsafeState(); |
| 688 | } |
| 689 | |
| 690 | // Check if we need to remove an old annotation (this is the case if |
| 691 | // this is the second, detailed, run of the analysis). |
| 692 | if (BC.MIB->hasAnnotation(Inst, Index: StateAnnotationIndex)) |
| 693 | BC.MIB->removeAnnotation(Inst, Index: StateAnnotationIndex); |
| 694 | // Attach the state *before* this instruction executes. |
| 695 | BC.MIB->addAnnotation(Inst, Index: StateAnnotationIndex, Val: S, AllocatorId: AllocId); |
| 696 | |
| 697 | // Compute the state after this instruction executes. |
| 698 | S = computeNext(Point: Inst, Cur: S); |
| 699 | } |
| 700 | } |
| 701 | |
| 702 | const SrcState &getStateBefore(const MCInst &Inst) const override { |
| 703 | return BC.MIB->getAnnotationAs<SrcState>(Inst, Index: StateAnnotationIndex); |
| 704 | } |
| 705 | |
| 706 | ~CFGUnawareSrcSafetyAnalysis() { cleanStateAnnotations(); } |
| 707 | }; |
| 708 | |
| 709 | std::shared_ptr<SrcSafetyAnalysis> |
| 710 | SrcSafetyAnalysis::create(BinaryFunction &BF, |
| 711 | MCPlusBuilder::AllocatorIdTy AllocId, |
| 712 | ArrayRef<MCPhysReg> RegsToTrackInstsFor) { |
| 713 | if (BF.hasCFG()) |
| 714 | return std::make_shared<DataflowSrcSafetyAnalysis>(args&: BF, args&: AllocId, |
| 715 | args&: RegsToTrackInstsFor); |
| 716 | return std::make_shared<CFGUnawareSrcSafetyAnalysis>(args&: BF, args&: AllocId, |
| 717 | args&: RegsToTrackInstsFor); |
| 718 | } |
| 719 | |
| 720 | // This function could return PartialReport<T>, but currently T is always |
| 721 | // MCPhysReg, even though it is an implementation detail. |
| 722 | static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location, |
| 723 | StringRef Text) { |
| 724 | auto Report = std::make_shared<GenericDiagnostic>(args&: Location, args&: Text); |
| 725 | return PartialReport<MCPhysReg>(Report, std::nullopt); |
| 726 | } |
| 727 | |
| 728 | template <typename T> |
| 729 | static PartialReport<T> make_gadget_report(const GadgetKind &Kind, |
| 730 | MCInstReference Location, |
| 731 | T RequestedDetails) { |
| 732 | auto Report = std::make_shared<GadgetDiagnostic>(args: Kind, args&: Location); |
| 733 | return PartialReport<T>(Report, RequestedDetails); |
| 734 | } |
| 735 | |
| 736 | static std::optional<PartialReport<MCPhysReg>> |
| 737 | shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst, |
| 738 | const SrcState &S) { |
| 739 | static const GadgetKind RetKind("non-protected ret found" ); |
| 740 | if (!BC.MIB->isReturn(Inst)) |
| 741 | return std::nullopt; |
| 742 | |
| 743 | bool IsAuthenticated = false; |
| 744 | std::optional<MCPhysReg> RetReg = |
| 745 | BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticatedInternally&: IsAuthenticated); |
| 746 | if (!RetReg) { |
| 747 | return make_generic_report( |
| 748 | Location: Inst, Text: "Warning: pac-ret analysis could not analyze this return " |
| 749 | "instruction" ); |
| 750 | } |
| 751 | if (IsAuthenticated) |
| 752 | return std::nullopt; |
| 753 | |
| 754 | LLVM_DEBUG({ |
| 755 | traceInst(BC, "Found RET inst" , Inst); |
| 756 | traceReg(BC, "RetReg" , *RetReg); |
| 757 | traceRegMask(BC, "SafeToDerefRegs" , S.SafeToDerefRegs); |
| 758 | }); |
| 759 | |
| 760 | if (S.SafeToDerefRegs[*RetReg]) |
| 761 | return std::nullopt; |
| 762 | |
| 763 | return make_gadget_report(Kind: RetKind, Location: Inst, RequestedDetails: *RetReg); |
| 764 | } |
| 765 | |
| 766 | static std::optional<PartialReport<MCPhysReg>> |
| 767 | shouldReportCallGadget(const BinaryContext &BC, const MCInstReference &Inst, |
| 768 | const SrcState &S) { |
| 769 | static const GadgetKind CallKind("non-protected call found" ); |
| 770 | if (!BC.MIB->isIndirectCall(Inst) && !BC.MIB->isIndirectBranch(Inst)) |
| 771 | return std::nullopt; |
| 772 | |
| 773 | bool IsAuthenticated = false; |
| 774 | MCPhysReg DestReg = |
| 775 | BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticatedInternally&: IsAuthenticated); |
| 776 | if (IsAuthenticated) |
| 777 | return std::nullopt; |
| 778 | |
| 779 | assert(DestReg != BC.MIB->getNoRegister() && "Valid register expected" ); |
| 780 | LLVM_DEBUG({ |
| 781 | traceInst(BC, "Found call inst" , Inst); |
| 782 | traceReg(BC, "Call destination reg" , DestReg); |
| 783 | traceRegMask(BC, "SafeToDerefRegs" , S.SafeToDerefRegs); |
| 784 | }); |
| 785 | if (S.SafeToDerefRegs[DestReg]) |
| 786 | return std::nullopt; |
| 787 | |
| 788 | return make_gadget_report(Kind: CallKind, Location: Inst, RequestedDetails: DestReg); |
| 789 | } |
| 790 | |
| 791 | static std::optional<PartialReport<MCPhysReg>> |
| 792 | shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst, |
| 793 | const SrcState &S) { |
| 794 | static const GadgetKind SigningOracleKind("signing oracle found" ); |
| 795 | |
| 796 | std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst); |
| 797 | if (!SignedReg) |
| 798 | return std::nullopt; |
| 799 | |
| 800 | LLVM_DEBUG({ |
| 801 | traceInst(BC, "Found sign inst" , Inst); |
| 802 | traceReg(BC, "Signed reg" , *SignedReg); |
| 803 | traceRegMask(BC, "TrustedRegs" , S.TrustedRegs); |
| 804 | }); |
| 805 | if (S.TrustedRegs[*SignedReg]) |
| 806 | return std::nullopt; |
| 807 | |
| 808 | return make_gadget_report(Kind: SigningOracleKind, Location: Inst, RequestedDetails: *SignedReg); |
| 809 | } |
| 810 | |
| 811 | template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) { |
| 812 | if (BF.hasCFG()) { |
| 813 | for (BinaryBasicBlock &BB : BF) |
| 814 | for (int64_t I = 0, E = BB.size(); I < E; ++I) |
| 815 | Fn(MCInstInBBReference(&BB, I)); |
| 816 | } else { |
| 817 | for (auto I : BF.instrs()) |
| 818 | Fn(MCInstInBFReference(&BF, I.first)); |
| 819 | } |
| 820 | } |
| 821 | |
| 822 | static SmallVector<MCPhysReg> |
| 823 | collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) { |
| 824 | SmallSet<MCPhysReg, 4> RegsToTrack; |
| 825 | for (auto Report : Reports) |
| 826 | if (Report.RequestedDetails) |
| 827 | RegsToTrack.insert(V: *Report.RequestedDetails); |
| 828 | |
| 829 | return SmallVector<MCPhysReg>(RegsToTrack.begin(), RegsToTrack.end()); |
| 830 | } |
| 831 | |
| 832 | void FunctionAnalysisContext::findUnsafeUses( |
| 833 | SmallVector<PartialReport<MCPhysReg>> &Reports) { |
| 834 | auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: {}); |
| 835 | LLVM_DEBUG({ dbgs() << "Running src register safety analysis...\n" ; }); |
| 836 | Analysis->run(); |
| 837 | LLVM_DEBUG({ |
| 838 | dbgs() << "After src register safety analysis:\n" ; |
| 839 | BF.dump(); |
| 840 | }); |
| 841 | |
| 842 | iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) { |
| 843 | const SrcState &S = Analysis->getStateBefore(Inst); |
| 844 | |
| 845 | // If non-empty state was never propagated from the entry basic block |
| 846 | // to Inst, assume it to be unreachable and report a warning. |
| 847 | if (S.empty()) { |
| 848 | Reports.push_back( |
| 849 | Elt: make_generic_report(Location: Inst, Text: "Warning: unreachable instruction found" )); |
| 850 | return; |
| 851 | } |
| 852 | |
| 853 | if (auto Report = shouldReportReturnGadget(BC, Inst, S)) |
| 854 | Reports.push_back(Elt: *Report); |
| 855 | |
| 856 | if (PacRetGadgetsOnly) |
| 857 | return; |
| 858 | |
| 859 | if (auto Report = shouldReportCallGadget(BC, Inst, S)) |
| 860 | Reports.push_back(Elt: *Report); |
| 861 | if (auto Report = shouldReportSigningOracle(BC, Inst, S)) |
| 862 | Reports.push_back(Elt: *Report); |
| 863 | }); |
| 864 | } |
| 865 | |
| 866 | void FunctionAnalysisContext::augmentUnsafeUseReports( |
| 867 | ArrayRef<PartialReport<MCPhysReg>> Reports) { |
| 868 | SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports); |
| 869 | // Re-compute the analysis with register tracking. |
| 870 | auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: RegsToTrack); |
| 871 | LLVM_DEBUG( |
| 872 | { dbgs() << "\nRunning detailed src register safety analysis...\n" ; }); |
| 873 | Analysis->run(); |
| 874 | LLVM_DEBUG({ |
| 875 | dbgs() << "After detailed src register safety analysis:\n" ; |
| 876 | BF.dump(); |
| 877 | }); |
| 878 | |
| 879 | // Augment gadget reports. |
| 880 | for (auto &Report : Reports) { |
| 881 | MCInstReference Location = Report.Issue->Location; |
| 882 | LLVM_DEBUG({ traceInst(BC, "Attaching clobbering info to" , Location); }); |
| 883 | assert(Report.RequestedDetails && |
| 884 | "Should be removed by handleSimpleReports" ); |
| 885 | auto DetailedInfo = |
| 886 | std::make_shared<ClobberingInfo>(args: Analysis->getLastClobberingInsts( |
| 887 | Inst: Location, BF, ClobberedReg: *Report.RequestedDetails)); |
| 888 | Result.Diagnostics.emplace_back(args: Report.Issue, args&: DetailedInfo); |
| 889 | } |
| 890 | } |
| 891 | |
| 892 | void FunctionAnalysisContext::handleSimpleReports( |
| 893 | SmallVector<PartialReport<MCPhysReg>> &Reports) { |
| 894 | // Before re-running the detailed analysis, process the reports which do not |
| 895 | // need any additional details to be attached. |
| 896 | for (auto &Report : Reports) { |
| 897 | if (!Report.RequestedDetails) |
| 898 | Result.Diagnostics.emplace_back(args&: Report.Issue, args: nullptr); |
| 899 | } |
| 900 | llvm::erase_if(C&: Reports, P: [](const auto &R) { return !R.RequestedDetails; }); |
| 901 | } |
| 902 | |
| 903 | void FunctionAnalysisContext::run() { |
| 904 | LLVM_DEBUG({ |
| 905 | dbgs() << "Analyzing function " << BF.getPrintName() |
| 906 | << ", AllocatorId = " << AllocatorId << "\n" ; |
| 907 | BF.dump(); |
| 908 | }); |
| 909 | |
| 910 | SmallVector<PartialReport<MCPhysReg>> UnsafeUses; |
| 911 | findUnsafeUses(Reports&: UnsafeUses); |
| 912 | handleSimpleReports(Reports&: UnsafeUses); |
| 913 | if (!UnsafeUses.empty()) |
| 914 | augmentUnsafeUseReports(Reports: UnsafeUses); |
| 915 | } |
| 916 | |
| 917 | void Analysis::runOnFunction(BinaryFunction &BF, |
| 918 | MCPlusBuilder::AllocatorIdTy AllocatorId) { |
| 919 | FunctionAnalysisContext FA(BF, AllocatorId, PacRetGadgetsOnly); |
| 920 | FA.run(); |
| 921 | |
| 922 | const FunctionAnalysisResult &FAR = FA.getResult(); |
| 923 | if (FAR.Diagnostics.empty()) |
| 924 | return; |
| 925 | |
| 926 | // `runOnFunction` is typically getting called from multiple threads in |
| 927 | // parallel. Therefore, use a lock to avoid data races when storing the |
| 928 | // result of the analysis in the `AnalysisResults` map. |
| 929 | { |
| 930 | std::lock_guard<std::mutex> Lock(AnalysisResultsMutex); |
| 931 | AnalysisResults[&BF] = FAR; |
| 932 | } |
| 933 | } |
| 934 | |
| 935 | static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB, |
| 936 | size_t StartIndex = 0, size_t EndIndex = -1) { |
| 937 | if (EndIndex == (size_t)-1) |
| 938 | EndIndex = BB->size() - 1; |
| 939 | const BinaryFunction *BF = BB->getFunction(); |
| 940 | for (unsigned I = StartIndex; I <= EndIndex; ++I) { |
| 941 | // FIXME: this assumes all instructions are 4 bytes in size. This is true |
| 942 | // for AArch64, but it might be good to extract this function so it can be |
| 943 | // used elsewhere and for other targets too. |
| 944 | uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I; |
| 945 | const MCInst &Inst = BB->getInstructionAtIndex(Index: I); |
| 946 | if (BC.MIB->isCFI(Inst)) |
| 947 | continue; |
| 948 | BC.printInstruction(OS&: outs(), Instruction: Inst, Offset: Address, Function: BF); |
| 949 | } |
| 950 | } |
| 951 | |
| 952 | static void reportFoundGadgetInSingleBBSingleRelatedInst( |
| 953 | raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst, |
| 954 | const MCInstReference Location) { |
| 955 | BinaryBasicBlock *BB = Location.getBasicBlock(); |
| 956 | assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent); |
| 957 | assert(Location.ParentKind == MCInstReference::BasicBlockParent); |
| 958 | MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef; |
| 959 | if (BB == RelatedInstBB.BB) { |
| 960 | OS << " This happens in the following basic block:\n" ; |
| 961 | printBB(BC, BB); |
| 962 | } |
| 963 | } |
| 964 | |
| 965 | void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC, |
| 966 | StringRef IssueKind) const { |
| 967 | BinaryFunction *BF = Location.getFunction(); |
| 968 | BinaryBasicBlock *BB = Location.getBasicBlock(); |
| 969 | |
| 970 | OS << "\nGS-PAUTH: " << IssueKind; |
| 971 | OS << " in function " << BF->getPrintName(); |
| 972 | if (BB) |
| 973 | OS << ", basic block " << BB->getName(); |
| 974 | OS << ", at address " << llvm::format(Fmt: "%x" , Vals: Location.getAddress()) << "\n" ; |
| 975 | OS << " The instruction is " ; |
| 976 | BC.printInstruction(OS, Instruction: Location, Offset: Location.getAddress(), Function: BF); |
| 977 | } |
| 978 | |
| 979 | void GadgetDiagnostic::generateReport(raw_ostream &OS, |
| 980 | const BinaryContext &BC) const { |
| 981 | printBasicInfo(OS, BC, IssueKind: Kind.getDescription()); |
| 982 | } |
| 983 | |
| 984 | static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location, |
| 985 | ArrayRef<MCInstReference> RelatedInstrs) { |
| 986 | const BinaryFunction &BF = *Location.getFunction(); |
| 987 | const BinaryContext &BC = BF.getBinaryContext(); |
| 988 | |
| 989 | // Sort by address to ensure output is deterministic. |
| 990 | SmallVector<MCInstReference> RI(RelatedInstrs); |
| 991 | llvm::sort(C&: RI, Comp: [](const MCInstReference &A, const MCInstReference &B) { |
| 992 | return A.getAddress() < B.getAddress(); |
| 993 | }); |
| 994 | for (unsigned I = 0; I < RI.size(); ++I) { |
| 995 | MCInstReference InstRef = RI[I]; |
| 996 | OS << " " << (I + 1) << ". " ; |
| 997 | BC.printInstruction(OS, Instruction: InstRef, Offset: InstRef.getAddress(), Function: &BF); |
| 998 | }; |
| 999 | if (RelatedInstrs.size() == 1) { |
| 1000 | const MCInstReference RelatedInst = RelatedInstrs[0]; |
| 1001 | // Printing the details for the MCInstReference::FunctionParent case |
| 1002 | // is not implemented not to overcomplicate the code, as most functions |
| 1003 | // are expected to have CFG information. |
| 1004 | if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent) |
| 1005 | reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst, |
| 1006 | Location); |
| 1007 | } |
| 1008 | } |
| 1009 | |
| 1010 | void ClobberingInfo::print(raw_ostream &OS, |
| 1011 | const MCInstReference Location) const { |
| 1012 | OS << " The " << ClobberingInstrs.size() |
| 1013 | << " instructions that write to the affected registers after any " |
| 1014 | "authentication are:\n" ; |
| 1015 | printRelatedInstrs(OS, Location, RelatedInstrs: ClobberingInstrs); |
| 1016 | } |
| 1017 | |
| 1018 | void GenericDiagnostic::generateReport(raw_ostream &OS, |
| 1019 | const BinaryContext &BC) const { |
| 1020 | printBasicInfo(OS, BC, IssueKind: Text); |
| 1021 | } |
| 1022 | |
| 1023 | Error Analysis::runOnFunctions(BinaryContext &BC) { |
| 1024 | ParallelUtilities::WorkFuncWithAllocTy WorkFun = |
| 1025 | [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) { |
| 1026 | runOnFunction(BF, AllocatorId); |
| 1027 | }; |
| 1028 | |
| 1029 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
| 1030 | return false; |
| 1031 | }; |
| 1032 | |
| 1033 | ParallelUtilities::runOnEachFunctionWithUniqueAllocId( |
| 1034 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun, |
| 1035 | SkipPredicate: SkipFunc, LogName: "PAuthGadgetScanner" ); |
| 1036 | |
| 1037 | for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { |
| 1038 | if (!AnalysisResults.count(x: BF)) |
| 1039 | continue; |
| 1040 | for (const FinalReport &R : AnalysisResults[BF].Diagnostics) { |
| 1041 | R.Issue->generateReport(OS&: outs(), BC); |
| 1042 | if (R.Details) |
| 1043 | R.Details->print(OS&: outs(), Location: R.Issue->Location); |
| 1044 | } |
| 1045 | } |
| 1046 | return Error::success(); |
| 1047 | } |
| 1048 | |
| 1049 | } // namespace PAuthGadgetScanner |
| 1050 | } // namespace bolt |
| 1051 | } // namespace llvm |
| 1052 | |