1//===- bolt/Passes/PAuthGadgetScanner.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass that looks for any AArch64 return instructions
10// that may not be protected by PAuth authentication instructions when needed.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Passes/PAuthGadgetScanner.h"
15#include "bolt/Core/ParallelUtilities.h"
16#include "bolt/Passes/DataflowAnalysis.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Support/Format.h"
21#include <memory>
22
23#define DEBUG_TYPE "bolt-pauth-scanner"
24
25namespace llvm {
26namespace bolt {
27
28raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
29 OS << "MCInstBBRef<";
30 if (Ref.BB == nullptr)
31 OS << "BB:(null)";
32 else
33 OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
34 OS << ">";
35 return OS;
36}
37
38raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
39 OS << "MCInstBFRef<";
40 if (Ref.BF == nullptr)
41 OS << "BF:(null)";
42 else
43 OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
44 OS << ">";
45 return OS;
46}
47
48raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
49 switch (Ref.ParentKind) {
50 case MCInstReference::BasicBlockParent:
51 OS << Ref.U.BBRef;
52 return OS;
53 case MCInstReference::FunctionParent:
54 OS << Ref.U.BFRef;
55 return OS;
56 }
57 llvm_unreachable("");
58}
59
60namespace PAuthGadgetScanner {
61
62[[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
63 const MCInst &MI) {
64 dbgs() << " " << Label << ": ";
65 BC.printInstruction(OS&: dbgs(), Instruction: MI);
66}
67
68[[maybe_unused]] static void traceReg(const BinaryContext &BC, StringRef Label,
69 MCPhysReg Reg) {
70 dbgs() << " " << Label << ": ";
71 if (Reg == BC.MIB->getNoRegister())
72 dbgs() << "(none)";
73 else
74 dbgs() << BC.MRI->getName(RegNo: Reg);
75 dbgs() << "\n";
76}
77
78[[maybe_unused]] static void traceRegMask(const BinaryContext &BC,
79 StringRef Label, BitVector Mask) {
80 dbgs() << " " << Label << ": ";
81 RegStatePrinter(BC).print(OS&: dbgs(), State: Mask);
82 dbgs() << "\n";
83}
84
85// Iterates over BinaryFunction's instructions like a range-based for loop:
86//
87// iterateOverInstrs(BF, [&](MCInstReference Inst) {
88// // loop body
89// });
90template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
91 if (BF.hasCFG()) {
92 for (BinaryBasicBlock &BB : BF)
93 for (int64_t I = 0, E = BB.size(); I < E; ++I)
94 Fn(MCInstInBBReference(&BB, I));
95 } else {
96 for (auto I : BF.instrs())
97 Fn(MCInstInBFReference(&BF, I.first));
98 }
99}
100
101// This class represents mapping from a set of arbitrary physical registers to
102// consecutive array indexes.
103class TrackedRegisters {
104 static constexpr uint16_t NoIndex = -1;
105 const std::vector<MCPhysReg> Registers;
106 std::vector<uint16_t> RegToIndexMapping;
107
108 static size_t getMappingSize(ArrayRef<MCPhysReg> RegsToTrack) {
109 if (RegsToTrack.empty())
110 return 0;
111 return 1 + *llvm::max_element(Range&: RegsToTrack);
112 }
113
114public:
115 TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack)
116 : Registers(RegsToTrack),
117 RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
118 for (unsigned I = 0; I < RegsToTrack.size(); ++I)
119 RegToIndexMapping[RegsToTrack[I]] = I;
120 }
121
122 ArrayRef<MCPhysReg> getRegisters() const { return Registers; }
123
124 size_t getNumTrackedRegisters() const { return Registers.size(); }
125
126 bool empty() const { return Registers.empty(); }
127
128 bool isTracked(MCPhysReg Reg) const {
129 bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() &&
130 RegToIndexMapping[Reg] != NoIndex;
131 assert(IsTracked == llvm::is_contained(Registers, Reg));
132 return IsTracked;
133 }
134
135 unsigned getIndex(MCPhysReg Reg) const {
136 assert(isTracked(Reg) && "Register is not tracked");
137 return RegToIndexMapping[Reg];
138 }
139};
140
141// The security property that is checked is:
142// When a register is used as the address to jump to in a return instruction,
143// that register must be safe-to-dereference. It must either
144// (a) be safe-to-dereference at function entry and never be changed within this
145// function, i.e. have the same value as when the function started, or
146// (b) the last write to the register must be by an authentication instruction.
147
148// This property is checked by using dataflow analysis to keep track of which
149// registers have been written (def-ed), since last authenticated. For pac-ret,
150// any return instruction using a register which is not safe-to-dereference is
151// a gadget to be reported. For PAuthABI, probably at least any indirect control
152// flow using such a register should be reported.
153
154// Furthermore, when producing a diagnostic for a found non-pac-ret protected
155// return, the analysis also lists the last instructions that wrote to the
156// register used in the return instruction.
157// The total set of registers used in return instructions in a given function is
158// small. It almost always is just `X30`.
159// In order to reduce the memory consumption of storing this additional state
160// during the dataflow analysis, this is computed by running the dataflow
161// analysis twice:
162// 1. In the first run, the dataflow analysis only keeps track of the security
163// property: i.e. which registers have been overwritten since the last
164// time they've been authenticated.
165// 2. If the first run finds any return instructions using a register last
166// written by a non-authenticating instruction, the dataflow analysis will
167// be run a second time. The first run will return which registers are used
168// in the gadgets to be reported. This information is used in the second run
169// to also track which instructions last wrote to those registers.
170
171typedef SmallPtrSet<const MCInst *, 4> SetOfRelatedInsts;
172
173/// A state representing which registers are safe to use by an instruction
174/// at a given program point.
175///
176/// To simplify reasoning, let's stick with the following approach:
177/// * when state is updated by the data-flow analysis, the sub-, super- and
178/// overlapping registers are marked as needed
179/// * when the particular instruction is checked if it represents a gadget,
180/// the specific bit of BitVector should be usable to answer this.
181///
182/// For example, on AArch64:
183/// * An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as
184/// safe-to-dereference. It does not change the state of X0_X1, for example,
185/// as super-registers partially retain their old, unsafe values.
186/// * LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps
187/// with: W1, W1_HI, X0_X1 and so on.
188/// * RET (which is implicitly RET X30) is a protected return if and only if
189/// X30 is safe-to-dereference - the state computed for sub- and
190/// super-registers is not inspected.
191struct SrcState {
192 /// A BitVector containing the registers that are either authenticated
193 /// (assuming failed authentication is permitted to produce an invalid
194 /// address, provided it generates an error on memory access) or whose
195 /// value is known not to be attacker-controlled under Pointer Authentication
196 /// threat model. The registers in this set are either
197 /// * not clobbered since being authenticated, or
198 /// * trusted at function entry and were not clobbered yet, or
199 /// * contain a safely materialized address.
200 BitVector SafeToDerefRegs;
201 /// A BitVector containing the registers that are either authenticated
202 /// *successfully* or whose value is known not to be attacker-controlled
203 /// under Pointer Authentication threat model.
204 /// The registers in this set are either
205 /// * authenticated and then checked to be authenticated successfully
206 /// (and not clobbered since then), or
207 /// * trusted at function entry and were not clobbered yet, or
208 /// * contain a safely materialized address.
209 BitVector TrustedRegs;
210 /// A vector of sets, only used in the second data flow run.
211 /// Each element in the vector represents one of the registers for which we
212 /// track the set of last instructions that wrote to this register. For
213 /// pac-ret analysis, the expectation is that almost all return instructions
214 /// only use register `X30`, and therefore, this vector will probably have
215 /// length 1 in the second run.
216 std::vector<SetOfRelatedInsts> LastInstWritingReg;
217
218 /// Construct an empty state.
219 SrcState() {}
220
221 SrcState(unsigned NumRegs, unsigned NumRegsToTrack)
222 : SafeToDerefRegs(NumRegs), TrustedRegs(NumRegs),
223 LastInstWritingReg(NumRegsToTrack) {}
224
225 SrcState &merge(const SrcState &StateIn) {
226 if (StateIn.empty())
227 return *this;
228 if (empty())
229 return (*this = StateIn);
230
231 SafeToDerefRegs &= StateIn.SafeToDerefRegs;
232 TrustedRegs &= StateIn.TrustedRegs;
233 for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
234 for (const MCInst *J : StateIn.LastInstWritingReg[I])
235 LastInstWritingReg[I].insert(Ptr: J);
236 return *this;
237 }
238
239 /// Returns true if this object does not store state of any registers -
240 /// neither safe, nor unsafe ones.
241 bool empty() const { return SafeToDerefRegs.empty(); }
242
243 bool operator==(const SrcState &RHS) const {
244 return SafeToDerefRegs == RHS.SafeToDerefRegs &&
245 TrustedRegs == RHS.TrustedRegs &&
246 LastInstWritingReg == RHS.LastInstWritingReg;
247 }
248 bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); }
249};
250
251static void printInstsShort(raw_ostream &OS,
252 ArrayRef<SetOfRelatedInsts> Insts) {
253 OS << "Insts: ";
254 for (unsigned I = 0; I < Insts.size(); ++I) {
255 auto &Set = Insts[I];
256 OS << "[" << I << "](";
257 for (const MCInst *MCInstP : Set)
258 OS << MCInstP << " ";
259 OS << ")";
260 }
261}
262
263static raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) {
264 OS << "src-state<";
265 if (S.empty()) {
266 OS << "empty";
267 } else {
268 OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", ";
269 OS << "TrustedRegs: " << S.TrustedRegs << ", ";
270 printInstsShort(OS, Insts: S.LastInstWritingReg);
271 }
272 OS << ">";
273 return OS;
274}
275
276class SrcStatePrinter {
277public:
278 void print(raw_ostream &OS, const SrcState &State) const;
279 explicit SrcStatePrinter(const BinaryContext &BC) : BC(BC) {}
280
281private:
282 const BinaryContext &BC;
283};
284
285void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const {
286 RegStatePrinter RegStatePrinter(BC);
287 OS << "src-state<";
288 if (S.empty()) {
289 assert(S.SafeToDerefRegs.empty());
290 assert(S.TrustedRegs.empty());
291 assert(S.LastInstWritingReg.empty());
292 OS << "empty";
293 } else {
294 OS << "SafeToDerefRegs: ";
295 RegStatePrinter.print(OS, State: S.SafeToDerefRegs);
296 OS << ", TrustedRegs: ";
297 RegStatePrinter.print(OS, State: S.TrustedRegs);
298 OS << ", ";
299 printInstsShort(OS, Insts: S.LastInstWritingReg);
300 }
301 OS << ">";
302}
303
304/// Computes which registers are safe to be used by control flow and signing
305/// instructions.
306///
307/// This is the base class for two implementations: a dataflow-based analysis
308/// which is intended to be used for most functions and a simplified CFG-unaware
309/// version for functions without reconstructed CFG.
310class SrcSafetyAnalysis {
311public:
312 SrcSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor)
313 : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()),
314 RegsToTrackInstsFor(RegsToTrackInstsFor) {}
315
316 virtual ~SrcSafetyAnalysis() {}
317
318 static std::shared_ptr<SrcSafetyAnalysis>
319 create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
320 ArrayRef<MCPhysReg> RegsToTrackInstsFor);
321
322 virtual void run() = 0;
323 virtual const SrcState &getStateBefore(const MCInst &Inst) const = 0;
324
325protected:
326 BinaryContext &BC;
327 const unsigned NumRegs;
328 /// RegToTrackInstsFor is the set of registers for which the dataflow analysis
329 /// must compute which the last set of instructions writing to it are.
330 const TrackedRegisters RegsToTrackInstsFor;
331 /// Stores information about the detected instruction sequences emitted to
332 /// check an authenticated pointer. Specifically, if such sequence is detected
333 /// in a basic block, it maps the last instruction of that basic block to
334 /// (CheckedRegister, FirstInstOfTheSequence) pair, see the description of
335 /// MCPlusBuilder::getAuthCheckedReg(BB) method.
336 ///
337 /// As the detection of such sequences requires iterating over the adjacent
338 /// instructions, it should be done before calling computeNext(), which
339 /// operates on separate instructions.
340 DenseMap<const MCInst *, std::pair<MCPhysReg, const MCInst *>>
341 CheckerSequenceInfo;
342
343 SetOfRelatedInsts &lastWritingInsts(SrcState &S, MCPhysReg Reg) const {
344 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
345 return S.LastInstWritingReg[Index];
346 }
347 const SetOfRelatedInsts &lastWritingInsts(const SrcState &S,
348 MCPhysReg Reg) const {
349 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
350 return S.LastInstWritingReg[Index];
351 }
352
353 SrcState createEntryState() {
354 SrcState S(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
355 for (MCPhysReg Reg : BC.MIB->getTrustedLiveInRegs())
356 S.TrustedRegs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true);
357 S.SafeToDerefRegs = S.TrustedRegs;
358 return S;
359 }
360
361 /// Computes a reasonably pessimistic estimation of the register state when
362 /// the previous instruction is not known for sure. Takes the set of registers
363 /// which are trusted at function entry and removes all registers that can be
364 /// clobbered inside this function.
365 SrcState computePessimisticState(BinaryFunction &BF) {
366 BitVector ClobberedRegs(NumRegs);
367 iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) {
368 BC.MIB->getClobberedRegs(Inst, Regs&: ClobberedRegs);
369
370 // If this is a call instruction, no register is safe anymore, unless
371 // it is a tail call. Ignore tail calls for the purpose of estimating the
372 // worst-case scenario, assuming no instructions are executed in the
373 // caller after this point anyway.
374 if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst))
375 ClobberedRegs.set();
376 });
377
378 SrcState S = createEntryState();
379 S.SafeToDerefRegs.reset(RHS: ClobberedRegs);
380 S.TrustedRegs.reset(RHS: ClobberedRegs);
381 return S;
382 }
383
384 BitVector getClobberedRegs(const MCInst &Point) const {
385 BitVector Clobbered(NumRegs);
386 // Assume a call can clobber all registers, including callee-saved
387 // registers. There's a good chance that callee-saved registers will be
388 // saved on the stack at some point during execution of the callee.
389 // Therefore they should also be considered as potentially modified by an
390 // attacker/written to.
391 // Also, not all functions may respect the AAPCS ABI rules about
392 // caller/callee-saved registers.
393 if (BC.MIB->isCall(Inst: Point))
394 Clobbered.set();
395 else
396 BC.MIB->getClobberedRegs(Inst: Point, Regs&: Clobbered);
397 return Clobbered;
398 }
399
400 // Returns all registers that can be treated as if they are written by an
401 // authentication instruction.
402 SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
403 const SrcState &Cur) const {
404 SmallVector<MCPhysReg> Regs;
405
406 // A signed pointer can be authenticated, or
407 bool Dummy = false;
408 if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked&: Dummy))
409 Regs.push_back(Elt: *AutReg);
410
411 // ... a safe address can be materialized, or
412 if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point))
413 Regs.push_back(Elt: *NewAddrReg);
414
415 // ... an address can be updated in a safe manner, producing the result
416 // which is as trusted as the input address.
417 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
418 if (Cur.SafeToDerefRegs[DstAndSrc->second])
419 Regs.push_back(Elt: DstAndSrc->first);
420 }
421
422 return Regs;
423 }
424
425 // Returns all registers made trusted by this instruction.
426 SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
427 const SrcState &Cur) const {
428 SmallVector<MCPhysReg> Regs;
429
430 // An authenticated pointer can be checked, or
431 std::optional<MCPhysReg> CheckedReg =
432 BC.MIB->getAuthCheckedReg(Inst: Point, /*MayOverwrite=*/false);
433 if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg])
434 Regs.push_back(Elt: *CheckedReg);
435
436 // ... a pointer can be authenticated by an instruction that always checks
437 // the pointer, or
438 bool IsChecked = false;
439 std::optional<MCPhysReg> AutReg =
440 BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked);
441 if (AutReg && IsChecked)
442 Regs.push_back(Elt: *AutReg);
443
444 if (CheckerSequenceInfo.contains(Val: &Point)) {
445 MCPhysReg CheckedReg;
446 const MCInst *FirstCheckerInst;
447 std::tie(args&: CheckedReg, args&: FirstCheckerInst) = CheckerSequenceInfo.at(Val: &Point);
448
449 // FirstCheckerInst should belong to the same basic block (see the
450 // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
451 // deterministically processed a few steps before this instruction.
452 const SrcState &StateBeforeChecker = getStateBefore(Inst: *FirstCheckerInst);
453 if (StateBeforeChecker.SafeToDerefRegs[CheckedReg])
454 Regs.push_back(Elt: CheckedReg);
455 }
456
457 // ... a safe address can be materialized, or
458 if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point))
459 Regs.push_back(Elt: *NewAddrReg);
460
461 // ... an address can be updated in a safe manner, producing the result
462 // which is as trusted as the input address.
463 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
464 if (Cur.TrustedRegs[DstAndSrc->second])
465 Regs.push_back(Elt: DstAndSrc->first);
466 }
467
468 return Regs;
469 }
470
471 SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
472 if (BC.MIB->isCFI(Inst: Point))
473 return Cur;
474
475 SrcStatePrinter P(BC);
476 LLVM_DEBUG({
477 dbgs() << " SrcSafetyAnalysis::ComputeNext(";
478 BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs());
479 dbgs() << ", ";
480 P.print(dbgs(), Cur);
481 dbgs() << ")\n";
482 });
483
484 // If this instruction is reachable, a non-empty state will be propagated
485 // to it from the entry basic block sooner or later. Until then, it is both
486 // more efficient and easier to reason about to skip computeNext().
487 if (Cur.empty()) {
488 LLVM_DEBUG(
489 { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; });
490 return SrcState();
491 }
492
493 // First, compute various properties of the instruction, taking the state
494 // before its execution into account, if necessary.
495
496 BitVector Clobbered = getClobberedRegs(Point);
497 SmallVector<MCPhysReg> NewSafeToDerefRegs =
498 getRegsMadeSafeToDeref(Point, Cur);
499 SmallVector<MCPhysReg> NewTrustedRegs = getRegsMadeTrusted(Point, Cur);
500
501 // Ideally, being trusted is a strictly stronger property than being
502 // safe-to-dereference. To simplify the computation of Next state, enforce
503 // this for NewSafeToDerefRegs and NewTrustedRegs. Additionally, this
504 // fixes the properly for "cumulative" register states in tricky cases
505 // like the following:
506 //
507 // ; LR is safe to dereference here
508 // mov x16, x30 ; start of the sequence, LR is s-t-d right before
509 // xpaclri ; clobbers LR, LR is not safe anymore
510 // cmp x30, x16
511 // b.eq 1f ; end of the sequence: LR is marked as trusted
512 // brk 0x1234
513 // 1:
514 // ; at this point LR would be marked as trusted,
515 // ; but not safe-to-dereference
516 //
517 for (auto TrustedReg : NewTrustedRegs) {
518 if (!is_contained(Range&: NewSafeToDerefRegs, Element: TrustedReg))
519 NewSafeToDerefRegs.push_back(Elt: TrustedReg);
520 }
521
522 // Then, compute the state after this instruction is executed.
523 SrcState Next = Cur;
524
525 Next.SafeToDerefRegs.reset(RHS: Clobbered);
526 Next.TrustedRegs.reset(RHS: Clobbered);
527 // Keep track of this instruction if it writes to any of the registers we
528 // need to track that for:
529 for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters())
530 if (Clobbered[Reg])
531 lastWritingInsts(S&: Next, Reg) = {&Point};
532
533 // After accounting for clobbered registers in general, override the state
534 // according to authentication and other *special cases* of clobbering.
535
536 // The sub-registers are also safe-to-dereference now, but not their
537 // super-registers (as they retain untrusted register units).
538 BitVector NewSafeSubregs(NumRegs);
539 for (MCPhysReg SafeReg : NewSafeToDerefRegs)
540 NewSafeSubregs |= BC.MIB->getAliases(Reg: SafeReg, /*OnlySmaller=*/true);
541 for (MCPhysReg Reg : NewSafeSubregs.set_bits()) {
542 Next.SafeToDerefRegs.set(Reg);
543 if (RegsToTrackInstsFor.isTracked(Reg))
544 lastWritingInsts(S&: Next, Reg).clear();
545 }
546
547 // Process new trusted registers.
548 for (MCPhysReg TrustedReg : NewTrustedRegs)
549 Next.TrustedRegs |= BC.MIB->getAliases(Reg: TrustedReg, /*OnlySmaller=*/true);
550
551 LLVM_DEBUG({
552 dbgs() << " .. result: (";
553 P.print(dbgs(), Next);
554 dbgs() << ")\n";
555 });
556
557 return Next;
558 }
559
560public:
561 std::vector<MCInstReference>
562 getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF,
563 MCPhysReg ClobberedReg) const {
564 const SrcState &S = getStateBefore(Inst);
565
566 std::vector<MCInstReference> Result;
567 for (const MCInst *Inst : lastWritingInsts(S, Reg: ClobberedReg)) {
568 MCInstReference Ref = MCInstReference::get(Inst, BF);
569 assert(Ref && "Expected Inst to be found");
570 Result.push_back(x: Ref);
571 }
572 return Result;
573 }
574};
575
576class DataflowSrcSafetyAnalysis
577 : public SrcSafetyAnalysis,
578 public DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState,
579 /*Backward=*/false, SrcStatePrinter> {
580 using DFParent = DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, false,
581 SrcStatePrinter>;
582 friend DFParent;
583
584 using SrcSafetyAnalysis::BC;
585 using SrcSafetyAnalysis::computeNext;
586
587 // Pessimistic initial state for basic blocks without any predecessors
588 // (not needed for most functions, thus initialized lazily).
589 SrcState PessimisticState;
590
591public:
592 DataflowSrcSafetyAnalysis(BinaryFunction &BF,
593 MCPlusBuilder::AllocatorIdTy AllocId,
594 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
595 : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), DFParent(BF, AllocId) {}
596
597 const SrcState &getStateBefore(const MCInst &Inst) const override {
598 return DFParent::getStateBefore(Point: Inst).get();
599 }
600
601 void run() override {
602 for (BinaryBasicBlock &BB : Func) {
603 if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) {
604 MCPhysReg CheckedReg = CheckerInfo->first;
605 MCInst &FirstInst = *CheckerInfo->second;
606 MCInst &LastInst = *BB.getLastNonPseudoInstr();
607 LLVM_DEBUG({
608 dbgs() << "Found pointer checking sequence in " << BB.getName()
609 << ":\n";
610 traceReg(BC, "Checked register", CheckedReg);
611 traceInst(BC, "First instruction", FirstInst);
612 traceInst(BC, "Last instruction", LastInst);
613 });
614 (void)CheckedReg;
615 (void)FirstInst;
616 assert(llvm::any_of(BB, [&](MCInst &I) { return &I == &FirstInst; }) &&
617 "Data-flow analysis expects the checker not to cross BBs");
618 CheckerSequenceInfo[&LastInst] = *CheckerInfo;
619 }
620 }
621 DFParent::run();
622 }
623
624protected:
625 void preflight() {}
626
627 SrcState getStartingStateAtBB(const BinaryBasicBlock &BB) {
628 if (BB.isEntryPoint())
629 return createEntryState();
630
631 // If a basic block without any predecessors is found in an optimized code,
632 // this likely means that some CFG edges were not detected. Pessimistically
633 // assume any register that can ever be clobbered in this function to be
634 // unsafe before this basic block.
635 // Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely
636 // means imprecise CFG information.
637 if (BB.pred_empty()) {
638 if (PessimisticState.empty())
639 PessimisticState = computePessimisticState(BF&: *BB.getParent());
640 return PessimisticState;
641 }
642
643 return SrcState();
644 }
645
646 SrcState getStartingStateAtPoint(const MCInst &Point) { return SrcState(); }
647
648 void doConfluence(SrcState &StateOut, const SrcState &StateIn) {
649 SrcStatePrinter P(BC);
650 LLVM_DEBUG({
651 dbgs() << " DataflowSrcSafetyAnalysis::Confluence(\n";
652 dbgs() << " State 1: ";
653 P.print(dbgs(), StateOut);
654 dbgs() << "\n";
655 dbgs() << " State 2: ";
656 P.print(dbgs(), StateIn);
657 dbgs() << ")\n";
658 });
659
660 StateOut.merge(StateIn);
661
662 LLVM_DEBUG({
663 dbgs() << " merged state: ";
664 P.print(dbgs(), StateOut);
665 dbgs() << "\n";
666 });
667 }
668
669 StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis"; }
670};
671
672/// A helper base class for implementing a simplified counterpart of a dataflow
673/// analysis for functions without CFG information.
674template <typename StateTy> class CFGUnawareAnalysis {
675 BinaryContext &BC;
676 BinaryFunction &BF;
677 MCPlusBuilder::AllocatorIdTy AllocId;
678 unsigned StateAnnotationIndex;
679
680 void cleanStateAnnotations() {
681 for (auto &I : BF.instrs())
682 BC.MIB->removeAnnotation(Inst&: I.second, Index: StateAnnotationIndex);
683 }
684
685protected:
686 CFGUnawareAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
687 StringRef AnnotationName)
688 : BC(BF.getBinaryContext()), BF(BF), AllocId(AllocId) {
689 StateAnnotationIndex = BC.MIB->getOrCreateAnnotationIndex(Name: AnnotationName);
690 }
691
692 void setState(MCInst &Inst, const StateTy &S) {
693 // Check if we need to remove an old annotation (this is the case if
694 // this is the second, detailed run of the analysis).
695 if (BC.MIB->hasAnnotation(Inst, Index: StateAnnotationIndex))
696 BC.MIB->removeAnnotation(Inst, Index: StateAnnotationIndex);
697 // Attach the state.
698 BC.MIB->addAnnotation(Inst, StateAnnotationIndex, S, AllocId);
699 }
700
701 const StateTy &getState(const MCInst &Inst) const {
702 return BC.MIB->getAnnotationAs<StateTy>(Inst, StateAnnotationIndex);
703 }
704
705 virtual ~CFGUnawareAnalysis() { cleanStateAnnotations(); }
706};
707
708// A simplified implementation of DataflowSrcSafetyAnalysis for functions
709// lacking CFG information.
710//
711// Let assume the instructions can only be executed linearly unless there is
712// a label to jump to - this should handle both directly jumping to a location
713// encoded as an immediate operand of a branch instruction, as well as saving a
714// branch destination somewhere and passing it to an indirect branch instruction
715// later, provided no arithmetic is performed on the destination address:
716//
717// ; good: the destination is directly encoded into the branch instruction
718// cbz x0, some_label
719//
720// ; good: the branch destination is first stored and then used as-is
721// adr x1, some_label
722// br x1
723//
724// ; bad: some clever arithmetic is performed manually
725// adr x1, some_label
726// add x1, x1, #4
727// br x1
728// ...
729// some_label:
730// ; pessimistically reset the state as we are unsure where we came from
731// ...
732// ret
733// JTI0:
734// .byte some_label - Ltmp0 ; computing offsets using labels may probably
735// work too, provided enough information is
736// retained by the assembler and linker
737//
738// Then, a function can be split into a number of disjoint contiguous sequences
739// of instructions without labels in between. These sequences can be processed
740// the same way basic blocks are processed by data-flow analysis, with the same
741// pessimistic estimation of the initial state at the start of each sequence
742// (except the first instruction of the function).
743class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
744 public CFGUnawareAnalysis<SrcState> {
745 using SrcSafetyAnalysis::BC;
746 BinaryFunction &BF;
747
748public:
749 CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
750 MCPlusBuilder::AllocatorIdTy AllocId,
751 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
752 : SrcSafetyAnalysis(BF, RegsToTrackInstsFor),
753 CFGUnawareAnalysis(BF, AllocId, "CFGUnawareSrcSafetyAnalysis"), BF(BF) {
754 }
755
756 void run() override {
757 const SrcState DefaultState = computePessimisticState(BF);
758 SrcState S = createEntryState();
759 for (auto &I : BF.instrs()) {
760 MCInst &Inst = I.second;
761 if (BC.MIB->isCFI(Inst))
762 continue;
763
764 // If there is a label before this instruction, it is possible that it
765 // can be jumped-to, thus conservatively resetting S. As an exception,
766 // let's ignore any labels at the beginning of the function, as at least
767 // one label is expected there.
768 if (BF.hasLabelAt(Offset: I.first) && &Inst != &BF.instrs().begin()->second) {
769 LLVM_DEBUG({
770 traceInst(BC, "Due to label, resetting the state before", Inst);
771 });
772 S = DefaultState;
773 }
774
775 // Attach the state *before* this instruction executes.
776 setState(Inst, S);
777
778 // Compute the state after this instruction executes.
779 S = computeNext(Point: Inst, Cur: S);
780 }
781 }
782
783 const SrcState &getStateBefore(const MCInst &Inst) const override {
784 return getState(Inst);
785 }
786};
787
788std::shared_ptr<SrcSafetyAnalysis>
789SrcSafetyAnalysis::create(BinaryFunction &BF,
790 MCPlusBuilder::AllocatorIdTy AllocId,
791 ArrayRef<MCPhysReg> RegsToTrackInstsFor) {
792 if (BF.hasCFG())
793 return std::make_shared<DataflowSrcSafetyAnalysis>(args&: BF, args&: AllocId,
794 args&: RegsToTrackInstsFor);
795 return std::make_shared<CFGUnawareSrcSafetyAnalysis>(args&: BF, args&: AllocId,
796 args&: RegsToTrackInstsFor);
797}
798
799/// A state representing which registers are safe to be used as the destination
800/// operand of an authentication instruction.
801///
802/// Similar to SrcState, it is the responsibility of the analysis to take
803/// register aliasing into account.
804///
805/// Depending on the implementation (such as whether FEAT_FPAC is implemented
806/// by an AArch64 CPU or not), it may be possible that an authentication
807/// instruction returns an invalid pointer on failure instead of terminating
808/// the program immediately (assuming the program will crash as soon as that
809/// pointer is dereferenced). Since few bits are usually allocated for the PAC
810/// field (such as less than 16 bits on a typical AArch64 system), an attacker
811/// can try every possible signature and guess the correct one if there is a
812/// gadget that tells whether the particular pointer has a correct signature
813/// (a so called "authentication oracle"). For that reason, it should be
814/// impossible for an attacker to test if a pointer is correctly signed -
815/// either the program should be terminated on authentication failure or
816/// the result of authentication should not be accessible to an attacker.
817///
818/// Considering the instructions in forward order as they are executed, a
819/// restricted set of operations can be allowed on any register containing a
820/// value derived from the result of an authentication instruction until that
821/// value is checked not to contain the result of a failed authentication.
822/// In DstSafetyAnalysis, these rules are adapted, so that the safety property
823/// for a register is computed by iterating the instructions in backward order.
824/// Then the resulting properties are used at authentication instruction sites
825/// to check output registers and report the particular instruction if it writes
826/// to an unsafe register.
827///
828/// Another approach would be to simulate the above rules as-is, iterating over
829/// the instructions in forward direction. To make it possible to report the
830/// particular instructions as oracles, this would probably require tracking
831/// references to these instructions for each register currently containing
832/// sensitive data.
833///
834/// In DstSafetyAnalysis, the source register Xn of an instruction Inst is safe
835/// if at least one of the following is true:
836/// * Inst checks if Xn contains the result of a successful authentication and
837/// terminates the program on failure. Note that Inst can either naturally
838/// dereference Xn (load, branch, return, etc. instructions) or be the first
839/// instruction of an explicit checking sequence.
840/// * Inst performs safe address arithmetic AND both source and result
841/// registers, as well as any temporary registers, must be safe after
842/// execution of Inst (temporaries are not used on AArch64 and thus not
843/// currently supported/allowed).
844/// See MCPlusBuilder::analyzeAddressArithmeticsForPtrAuth for the details.
845/// * Inst fully overwrites Xn with a constant.
846struct DstState {
847 /// The set of registers whose values cannot be inspected by an attacker in
848 /// a way usable as an authentication oracle. The results of authentication
849 /// instructions should only be written to such registers.
850 BitVector CannotEscapeUnchecked;
851
852 /// A vector of sets, only used on the second analysis run.
853 /// Each element in this vector represents one of the tracked registers.
854 /// For each such register we track the set of first instructions that leak
855 /// the authenticated pointer before it was checked. This is intended to
856 /// provide clues on which instruction made the particular register unsafe.
857 ///
858 /// Please note that the mapping from MCPhysReg values to indexes in this
859 /// vector is provided by RegsToTrackInstsFor field of DstSafetyAnalysis.
860 std::vector<SetOfRelatedInsts> FirstInstLeakingReg;
861
862 /// Constructs an empty state.
863 DstState() {}
864
865 DstState(unsigned NumRegs, unsigned NumRegsToTrack)
866 : CannotEscapeUnchecked(NumRegs), FirstInstLeakingReg(NumRegsToTrack) {}
867
868 DstState &merge(const DstState &StateIn) {
869 if (StateIn.empty())
870 return *this;
871 if (empty())
872 return (*this = StateIn);
873
874 CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked;
875 for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I)
876 for (const MCInst *J : StateIn.FirstInstLeakingReg[I])
877 FirstInstLeakingReg[I].insert(Ptr: J);
878 return *this;
879 }
880
881 /// Returns true if this object does not store state of any registers -
882 /// neither safe, nor unsafe ones.
883 bool empty() const { return CannotEscapeUnchecked.empty(); }
884
885 bool operator==(const DstState &RHS) const {
886 return CannotEscapeUnchecked == RHS.CannotEscapeUnchecked &&
887 FirstInstLeakingReg == RHS.FirstInstLeakingReg;
888 }
889 bool operator!=(const DstState &RHS) const { return !((*this) == RHS); }
890};
891
892static raw_ostream &operator<<(raw_ostream &OS, const DstState &S) {
893 OS << "dst-state<";
894 if (S.empty()) {
895 OS << "empty";
896 } else {
897 OS << "CannotEscapeUnchecked: " << S.CannotEscapeUnchecked << ", ";
898 printInstsShort(OS, Insts: S.FirstInstLeakingReg);
899 }
900 OS << ">";
901 return OS;
902}
903
904class DstStatePrinter {
905public:
906 void print(raw_ostream &OS, const DstState &S) const;
907 explicit DstStatePrinter(const BinaryContext &BC) : BC(BC) {}
908
909private:
910 const BinaryContext &BC;
911};
912
913void DstStatePrinter::print(raw_ostream &OS, const DstState &S) const {
914 RegStatePrinter RegStatePrinter(BC);
915 OS << "dst-state<";
916 if (S.empty()) {
917 assert(S.CannotEscapeUnchecked.empty());
918 assert(S.FirstInstLeakingReg.empty());
919 OS << "empty";
920 } else {
921 OS << "CannotEscapeUnchecked: ";
922 RegStatePrinter.print(OS, State: S.CannotEscapeUnchecked);
923 OS << ", ";
924 printInstsShort(OS, Insts: S.FirstInstLeakingReg);
925 }
926 OS << ">";
927}
928
929/// Computes which registers are safe to be written to by auth instructions.
930///
931/// This is the base class for two implementations: a dataflow-based analysis
932/// which is intended to be used for most functions and a simplified CFG-unaware
933/// version for functions without reconstructed CFG.
934class DstSafetyAnalysis {
935public:
936 DstSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor)
937 : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()),
938 RegsToTrackInstsFor(RegsToTrackInstsFor) {}
939
940 virtual ~DstSafetyAnalysis() {}
941
942 static std::shared_ptr<DstSafetyAnalysis>
943 create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
944 ArrayRef<MCPhysReg> RegsToTrackInstsFor);
945
946 virtual void run() = 0;
947 virtual const DstState &getStateAfter(const MCInst &Inst) const = 0;
948
949protected:
950 BinaryContext &BC;
951 const unsigned NumRegs;
952
953 const TrackedRegisters RegsToTrackInstsFor;
954
955 /// Stores information about the detected instruction sequences emitted to
956 /// check an authenticated pointer. Specifically, if such sequence is detected
957 /// in a basic block, it maps the first instruction of that sequence to the
958 /// register being checked.
959 ///
960 /// As the detection of such sequences requires iterating over the adjacent
961 /// instructions, it should be done before calling computeNext(), which
962 /// operates on separate instructions.
963 DenseMap<const MCInst *, MCPhysReg> RegCheckedAt;
964
965 SetOfRelatedInsts &firstLeakingInsts(DstState &S, MCPhysReg Reg) const {
966 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
967 return S.FirstInstLeakingReg[Index];
968 }
969 const SetOfRelatedInsts &firstLeakingInsts(const DstState &S,
970 MCPhysReg Reg) const {
971 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
972 return S.FirstInstLeakingReg[Index];
973 }
974
975 /// Creates a state with all registers marked unsafe (not to be confused
976 /// with empty state).
977 DstState createUnsafeState() {
978 return DstState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
979 }
980
981 /// Returns the set of registers that can be leaked by this instruction.
982 /// A register is considered leaked if it has any intersection with any
983 /// register read by Inst. This is similar to how the set of clobbered
984 /// registers is computed, but taking input operands instead of outputs.
985 BitVector getLeakedRegs(const MCInst &Inst) const {
986 BitVector Leaked(NumRegs);
987
988 // Assume a call can read all registers.
989 if (BC.MIB->isCall(Inst)) {
990 Leaked.set();
991 return Leaked;
992 }
993
994 // Compute the set of registers overlapping with any register used by
995 // this instruction.
996
997 const MCInstrDesc &Desc = BC.MII->get(Opcode: Inst.getOpcode());
998
999 for (MCPhysReg Reg : Desc.implicit_uses())
1000 Leaked |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/false);
1001
1002 for (const MCOperand &Op : BC.MIB->useOperands(Inst)) {
1003 if (Op.isReg())
1004 Leaked |= BC.MIB->getAliases(Reg: Op.getReg(), /*OnlySmaller=*/false);
1005 }
1006
1007 return Leaked;
1008 }
1009
1010 SmallVector<MCPhysReg> getRegsMadeProtected(const MCInst &Inst,
1011 const BitVector &LeakedRegs,
1012 const DstState &Cur) const {
1013 SmallVector<MCPhysReg> Regs;
1014
1015 // A pointer can be checked, or
1016 if (auto CheckedReg =
1017 BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/true))
1018 Regs.push_back(Elt: *CheckedReg);
1019 if (RegCheckedAt.contains(Val: &Inst))
1020 Regs.push_back(Elt: RegCheckedAt.at(Val: &Inst));
1021
1022 // ... it can be used as a branch target, or
1023 if (BC.MIB->isIndirectBranch(Inst) || BC.MIB->isIndirectCall(Inst)) {
1024 bool IsAuthenticated;
1025 MCPhysReg BranchDestReg =
1026 BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
1027 assert(BranchDestReg != BC.MIB->getNoRegister());
1028 if (!IsAuthenticated)
1029 Regs.push_back(Elt: BranchDestReg);
1030 }
1031
1032 // ... it can be used as a return target, or
1033 if (BC.MIB->isReturn(Inst)) {
1034 bool IsAuthenticated = false;
1035 std::optional<MCPhysReg> RetReg =
1036 BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
1037 if (RetReg && !IsAuthenticated)
1038 Regs.push_back(Elt: *RetReg);
1039 }
1040
1041 // ... an address can be updated in a safe manner, or
1042 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) {
1043 MCPhysReg DstReg, SrcReg;
1044 std::tie(args&: DstReg, args&: SrcReg) = *DstAndSrc;
1045 // Note that *all* registers containing the derived values must be safe,
1046 // both source and destination ones. No temporaries are supported at now.
1047 if (Cur.CannotEscapeUnchecked[SrcReg] &&
1048 Cur.CannotEscapeUnchecked[DstReg])
1049 Regs.push_back(Elt: SrcReg);
1050 }
1051
1052 // ... the register can be overwritten in whole with a constant: for that
1053 // purpose, look for the instructions with no register inputs (neither
1054 // explicit nor implicit ones) and no side effects (to rule out reading
1055 // not modelled locations).
1056 const MCInstrDesc &Desc = BC.MII->get(Opcode: Inst.getOpcode());
1057 bool HasExplicitSrcRegs = llvm::any_of(Range: BC.MIB->useOperands(Inst),
1058 P: [](auto Op) { return Op.isReg(); });
1059 if (!Desc.hasUnmodeledSideEffects() && !HasExplicitSrcRegs &&
1060 Desc.implicit_uses().empty()) {
1061 for (const MCOperand &Def : BC.MIB->defOperands(Inst))
1062 Regs.push_back(Elt: Def.getReg());
1063 }
1064
1065 return Regs;
1066 }
1067
1068 DstState computeNext(const MCInst &Point, const DstState &Cur) {
1069 if (BC.MIB->isCFI(Inst: Point))
1070 return Cur;
1071
1072 DstStatePrinter P(BC);
1073 LLVM_DEBUG({
1074 dbgs() << " DstSafetyAnalysis::ComputeNext(";
1075 BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs());
1076 dbgs() << ", ";
1077 P.print(dbgs(), Cur);
1078 dbgs() << ")\n";
1079 });
1080
1081 // If this instruction is reachable by the analysis, a non-empty state will
1082 // be propagated to it sooner or later. Until then, skip computeNext().
1083 if (Cur.empty()) {
1084 LLVM_DEBUG(
1085 { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; });
1086 return DstState();
1087 }
1088
1089 // First, compute various properties of the instruction, taking the state
1090 // after its execution into account, if necessary.
1091
1092 BitVector LeakedRegs = getLeakedRegs(Inst: Point);
1093 SmallVector<MCPhysReg> NewProtectedRegs =
1094 getRegsMadeProtected(Inst: Point, LeakedRegs, Cur);
1095
1096 // Then, compute the state before this instruction is executed.
1097 DstState Next = Cur;
1098
1099 Next.CannotEscapeUnchecked.reset(RHS: LeakedRegs);
1100 for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) {
1101 if (LeakedRegs[Reg])
1102 firstLeakingInsts(S&: Next, Reg) = {&Point};
1103 }
1104
1105 BitVector NewProtectedSubregs(NumRegs);
1106 for (MCPhysReg Reg : NewProtectedRegs)
1107 NewProtectedSubregs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true);
1108 Next.CannotEscapeUnchecked |= NewProtectedSubregs;
1109 for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) {
1110 if (NewProtectedSubregs[Reg])
1111 firstLeakingInsts(S&: Next, Reg).clear();
1112 }
1113
1114 LLVM_DEBUG({
1115 dbgs() << " .. result: (";
1116 P.print(dbgs(), Next);
1117 dbgs() << ")\n";
1118 });
1119
1120 return Next;
1121 }
1122
1123public:
1124 std::vector<MCInstReference> getLeakingInsts(const MCInst &Inst,
1125 BinaryFunction &BF,
1126 MCPhysReg LeakedReg) const {
1127 const DstState &S = getStateAfter(Inst);
1128
1129 std::vector<MCInstReference> Result;
1130 for (const MCInst *Inst : firstLeakingInsts(S, Reg: LeakedReg)) {
1131 MCInstReference Ref = MCInstReference::get(Inst, BF);
1132 assert(Ref && "Expected Inst to be found");
1133 Result.push_back(x: Ref);
1134 }
1135 return Result;
1136 }
1137};
1138
1139class DataflowDstSafetyAnalysis
1140 : public DstSafetyAnalysis,
1141 public DataflowAnalysis<DataflowDstSafetyAnalysis, DstState,
1142 /*Backward=*/true, DstStatePrinter> {
1143 using DFParent = DataflowAnalysis<DataflowDstSafetyAnalysis, DstState, true,
1144 DstStatePrinter>;
1145 friend DFParent;
1146
1147 using DstSafetyAnalysis::BC;
1148 using DstSafetyAnalysis::computeNext;
1149
1150public:
1151 DataflowDstSafetyAnalysis(BinaryFunction &BF,
1152 MCPlusBuilder::AllocatorIdTy AllocId,
1153 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
1154 : DstSafetyAnalysis(BF, RegsToTrackInstsFor), DFParent(BF, AllocId) {}
1155
1156 const DstState &getStateAfter(const MCInst &Inst) const override {
1157 // The dataflow analysis base class iterates backwards over the
1158 // instructions, thus "after" vs. "before" difference.
1159 return DFParent::getStateBefore(Point: Inst).get();
1160 }
1161
1162 void run() override {
1163 for (BinaryBasicBlock &BB : Func) {
1164 if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) {
1165 LLVM_DEBUG({
1166 dbgs() << "Found pointer checking sequence in " << BB.getName()
1167 << ":\n";
1168 traceReg(BC, "Checked register", CheckerInfo->first);
1169 traceInst(BC, "First instruction", *CheckerInfo->second);
1170 });
1171 RegCheckedAt[CheckerInfo->second] = CheckerInfo->first;
1172 }
1173 }
1174 DFParent::run();
1175 }
1176
1177protected:
1178 void preflight() {}
1179
1180 DstState getStartingStateAtBB(const BinaryBasicBlock &BB) {
1181 // In general, the initial state should be empty, not everything-is-unsafe,
1182 // to give a chance for some meaningful state to be propagated to BB from
1183 // an indirectly reachable "exit basic block" ending with a return or tail
1184 // call instruction.
1185 //
1186 // A basic block without any successors, on the other hand, can be
1187 // pessimistically initialized to everything-is-unsafe: this will naturally
1188 // handle both return and tail call instructions and is harmless for
1189 // internal indirect branch instructions (such as computed gotos).
1190 if (BB.succ_empty())
1191 return createUnsafeState();
1192
1193 return DstState();
1194 }
1195
1196 DstState getStartingStateAtPoint(const MCInst &Point) { return DstState(); }
1197
1198 void doConfluence(DstState &StateOut, const DstState &StateIn) {
1199 DstStatePrinter P(BC);
1200 LLVM_DEBUG({
1201 dbgs() << " DataflowDstSafetyAnalysis::Confluence(\n";
1202 dbgs() << " State 1: ";
1203 P.print(dbgs(), StateOut);
1204 dbgs() << "\n";
1205 dbgs() << " State 2: ";
1206 P.print(dbgs(), StateIn);
1207 dbgs() << ")\n";
1208 });
1209
1210 StateOut.merge(StateIn);
1211
1212 LLVM_DEBUG({
1213 dbgs() << " merged state: ";
1214 P.print(dbgs(), StateOut);
1215 dbgs() << "\n";
1216 });
1217 }
1218
1219 StringRef getAnnotationName() const { return "DataflowDstSafetyAnalysis"; }
1220};
1221
1222class CFGUnawareDstSafetyAnalysis : public DstSafetyAnalysis,
1223 public CFGUnawareAnalysis<DstState> {
1224 using DstSafetyAnalysis::BC;
1225 BinaryFunction &BF;
1226
1227public:
1228 CFGUnawareDstSafetyAnalysis(BinaryFunction &BF,
1229 MCPlusBuilder::AllocatorIdTy AllocId,
1230 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
1231 : DstSafetyAnalysis(BF, RegsToTrackInstsFor),
1232 CFGUnawareAnalysis(BF, AllocId, "CFGUnawareDstSafetyAnalysis"), BF(BF) {
1233 }
1234
1235 void run() override {
1236 DstState S = createUnsafeState();
1237 for (auto &I : llvm::reverse(C: BF.instrs())) {
1238 MCInst &Inst = I.second;
1239 if (BC.MIB->isCFI(Inst))
1240 continue;
1241
1242 // If Inst can change the control flow, we cannot be sure that the next
1243 // instruction (to be executed in analyzed program) is the one processed
1244 // on the previous iteration, thus pessimistically reset S before
1245 // starting to analyze Inst.
1246 if (BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst) ||
1247 BC.MIB->isReturn(Inst)) {
1248 LLVM_DEBUG({ traceInst(BC, "Control flow instruction", Inst); });
1249 S = createUnsafeState();
1250 }
1251
1252 // Attach the state *after* this instruction executes.
1253 setState(Inst, S);
1254
1255 // Compute the next state.
1256 S = computeNext(Point: Inst, Cur: S);
1257 }
1258 }
1259
1260 const DstState &getStateAfter(const MCInst &Inst) const override {
1261 return getState(Inst);
1262 }
1263};
1264
1265std::shared_ptr<DstSafetyAnalysis>
1266DstSafetyAnalysis::create(BinaryFunction &BF,
1267 MCPlusBuilder::AllocatorIdTy AllocId,
1268 ArrayRef<MCPhysReg> RegsToTrackInstsFor) {
1269 if (BF.hasCFG())
1270 return std::make_shared<DataflowDstSafetyAnalysis>(args&: BF, args&: AllocId,
1271 args&: RegsToTrackInstsFor);
1272 return std::make_shared<CFGUnawareDstSafetyAnalysis>(args&: BF, args&: AllocId,
1273 args&: RegsToTrackInstsFor);
1274}
1275
1276// This function could return PartialReport<T>, but currently T is always
1277// MCPhysReg, even though it is an implementation detail.
1278static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location,
1279 StringRef Text) {
1280 auto Report = std::make_shared<GenericDiagnostic>(args&: Location, args&: Text);
1281 return PartialReport<MCPhysReg>(Report, std::nullopt);
1282}
1283
1284template <typename T>
1285static PartialReport<T> make_gadget_report(const GadgetKind &Kind,
1286 MCInstReference Location,
1287 T RequestedDetails) {
1288 auto Report = std::make_shared<GadgetDiagnostic>(args: Kind, args&: Location);
1289 return PartialReport<T>(Report, RequestedDetails);
1290}
1291
1292static std::optional<PartialReport<MCPhysReg>>
1293shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
1294 const SrcState &S) {
1295 static const GadgetKind RetKind("non-protected ret found");
1296 if (!BC.MIB->isReturn(Inst))
1297 return std::nullopt;
1298
1299 bool IsAuthenticated = false;
1300 std::optional<MCPhysReg> RetReg =
1301 BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
1302 if (!RetReg) {
1303 return make_generic_report(
1304 Location: Inst, Text: "Warning: pac-ret analysis could not analyze this return "
1305 "instruction");
1306 }
1307 if (IsAuthenticated)
1308 return std::nullopt;
1309
1310 LLVM_DEBUG({
1311 traceInst(BC, "Found RET inst", Inst);
1312 traceReg(BC, "RetReg", *RetReg);
1313 traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
1314 });
1315
1316 if (S.SafeToDerefRegs[*RetReg])
1317 return std::nullopt;
1318
1319 return make_gadget_report(Kind: RetKind, Location: Inst, RequestedDetails: *RetReg);
1320}
1321
1322/// While BOLT already marks some of the branch instructions as tail calls,
1323/// this function tries to detect less obvious cases, assuming false positives
1324/// are acceptable as long as there are not too many of them.
1325///
1326/// It is possible that not all the instructions classified as tail calls by
1327/// this function are safe to be considered as such for the purpose of code
1328/// transformations performed by BOLT. The intention of this function is to
1329/// spot some of actually missed tail calls (and likely a number of unrelated
1330/// indirect branch instructions) as long as this doesn't increase the amount
1331/// of false positive reports unacceptably.
1332static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
1333 const BinaryFunction &BF,
1334 const MCInstReference &Inst) {
1335 // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
1336 // (such as isBranch at the time of writing this comment), some don't (such
1337 // as isCall). For that reason, call MCInstrDesc's methods explicitly when
1338 // it is important.
1339 const MCInstrDesc &Desc =
1340 BC.MII->get(Opcode: static_cast<const MCInst &>(Inst).getOpcode());
1341 // Tail call should be a branch (but not necessarily an indirect one).
1342 if (!Desc.isBranch())
1343 return false;
1344
1345 // Always analyze the branches already marked as tail calls by BOLT.
1346 if (BC.MIB->isTailCall(Inst))
1347 return true;
1348
1349 // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
1350 // below is a simplified condition from BinaryContext::printInstruction.
1351 bool IsUnknownControlFlow =
1352 BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst);
1353
1354 if (BF.hasCFG() && IsUnknownControlFlow)
1355 return true;
1356
1357 return false;
1358}
1359
1360static std::optional<PartialReport<MCPhysReg>>
1361shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
1362 const MCInstReference &Inst, const SrcState &S) {
1363 static const GadgetKind UntrustedLRKind(
1364 "untrusted link register found before tail call");
1365
1366 if (!shouldAnalyzeTailCallInst(BC, BF, Inst))
1367 return std::nullopt;
1368
1369 // Not only the set of registers returned by getTrustedLiveInRegs() can be
1370 // seen as a reasonable target-independent _approximation_ of "the LR", these
1371 // are *exactly* those registers used by SrcSafetyAnalysis to initialize the
1372 // set of trusted registers on function entry.
1373 // Thus, this function basically checks that the precondition expected to be
1374 // imposed by a function call instruction (which is hardcoded into the target-
1375 // specific getTrustedLiveInRegs() function) is also respected on tail calls.
1376 SmallVector<MCPhysReg> RegsToCheck = BC.MIB->getTrustedLiveInRegs();
1377 LLVM_DEBUG({
1378 traceInst(BC, "Found tail call inst", Inst);
1379 traceRegMask(BC, "Trusted regs", S.TrustedRegs);
1380 });
1381
1382 // In musl on AArch64, the _start function sets LR to zero and calls the next
1383 // stage initialization function at the end, something along these lines:
1384 //
1385 // _start:
1386 // mov x30, #0
1387 // ; ... other initialization ...
1388 // b _start_c ; performs "exit" system call at some point
1389 //
1390 // As this would produce a false positive for every executable linked with
1391 // such libc, ignore tail calls performed by ELF entry function.
1392 if (BC.StartFunctionAddress &&
1393 *BC.StartFunctionAddress == Inst.getFunction()->getAddress()) {
1394 LLVM_DEBUG({ dbgs() << " Skipping tail call in ELF entry function.\n"; });
1395 return std::nullopt;
1396 }
1397
1398 // Returns at most one report per instruction - this is probably OK...
1399 for (auto Reg : RegsToCheck)
1400 if (!S.TrustedRegs[Reg])
1401 return make_gadget_report(Kind: UntrustedLRKind, Location: Inst, RequestedDetails: Reg);
1402
1403 return std::nullopt;
1404}
1405
1406static std::optional<PartialReport<MCPhysReg>>
1407shouldReportCallGadget(const BinaryContext &BC, const MCInstReference &Inst,
1408 const SrcState &S) {
1409 static const GadgetKind CallKind("non-protected call found");
1410 if (!BC.MIB->isIndirectCall(Inst) && !BC.MIB->isIndirectBranch(Inst))
1411 return std::nullopt;
1412
1413 bool IsAuthenticated = false;
1414 MCPhysReg DestReg =
1415 BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
1416 if (IsAuthenticated)
1417 return std::nullopt;
1418
1419 assert(DestReg != BC.MIB->getNoRegister() && "Valid register expected");
1420 LLVM_DEBUG({
1421 traceInst(BC, "Found call inst", Inst);
1422 traceReg(BC, "Call destination reg", DestReg);
1423 traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
1424 });
1425 if (S.SafeToDerefRegs[DestReg])
1426 return std::nullopt;
1427
1428 return make_gadget_report(Kind: CallKind, Location: Inst, RequestedDetails: DestReg);
1429}
1430
1431static std::optional<PartialReport<MCPhysReg>>
1432shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst,
1433 const SrcState &S) {
1434 static const GadgetKind SigningOracleKind("signing oracle found");
1435
1436 std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst);
1437 if (!SignedReg)
1438 return std::nullopt;
1439
1440 LLVM_DEBUG({
1441 traceInst(BC, "Found sign inst", Inst);
1442 traceReg(BC, "Signed reg", *SignedReg);
1443 traceRegMask(BC, "TrustedRegs", S.TrustedRegs);
1444 });
1445 if (S.TrustedRegs[*SignedReg])
1446 return std::nullopt;
1447
1448 return make_gadget_report(Kind: SigningOracleKind, Location: Inst, RequestedDetails: *SignedReg);
1449}
1450
1451static std::optional<PartialReport<MCPhysReg>>
1452shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
1453 const DstState &S) {
1454 static const GadgetKind AuthOracleKind("authentication oracle found");
1455
1456 bool IsChecked = false;
1457 std::optional<MCPhysReg> AuthReg =
1458 BC.MIB->getWrittenAuthenticatedReg(Inst, IsChecked);
1459 if (!AuthReg || IsChecked)
1460 return std::nullopt;
1461
1462 LLVM_DEBUG({
1463 traceInst(BC, "Found auth inst", Inst);
1464 traceReg(BC, "Authenticated reg", *AuthReg);
1465 });
1466
1467 if (S.empty()) {
1468 LLVM_DEBUG({ dbgs() << " DstState is empty!\n"; });
1469 return make_generic_report(
1470 Location: Inst, Text: "Warning: no state computed for an authentication instruction "
1471 "(possibly unreachable)");
1472 }
1473
1474 LLVM_DEBUG(
1475 { traceRegMask(BC, "safe output registers", S.CannotEscapeUnchecked); });
1476 if (S.CannotEscapeUnchecked[*AuthReg])
1477 return std::nullopt;
1478
1479 return make_gadget_report(Kind: AuthOracleKind, Location: Inst, RequestedDetails: *AuthReg);
1480}
1481
1482static SmallVector<MCPhysReg>
1483collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
1484 SmallSet<MCPhysReg, 4> RegsToTrack;
1485 for (auto Report : Reports)
1486 if (Report.RequestedDetails)
1487 RegsToTrack.insert(V: *Report.RequestedDetails);
1488
1489 return SmallVector<MCPhysReg>(RegsToTrack.begin(), RegsToTrack.end());
1490}
1491
1492void FunctionAnalysisContext::findUnsafeUses(
1493 SmallVector<PartialReport<MCPhysReg>> &Reports) {
1494 auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: {});
1495 LLVM_DEBUG({ dbgs() << "Running src register safety analysis...\n"; });
1496 Analysis->run();
1497 LLVM_DEBUG({
1498 dbgs() << "After src register safety analysis:\n";
1499 BF.dump();
1500 });
1501
1502 bool UnreachableBBReported = false;
1503 if (BF.hasCFG()) {
1504 // Warn on basic blocks being unreachable according to BOLT (at most once
1505 // per BinaryFunction), as this likely means the CFG reconstructed by BOLT
1506 // is imprecise. A basic block can be
1507 // * reachable from an entry basic block - a hopefully correct non-empty
1508 // state is propagated to that basic block sooner or later. All basic
1509 // blocks are expected to belong to this category under normal conditions.
1510 // * reachable from a "directly unreachable" BB (a basic block that has no
1511 // direct predecessors and this is not because it is an entry BB) - *some*
1512 // non-empty state is propagated to this basic block sooner or later, as
1513 // the initial state of directly unreachable basic blocks is
1514 // pessimistically initialized to "all registers are unsafe"
1515 // - a warning can be printed for the "directly unreachable" basic block
1516 // * neither reachable from an entry nor from a "directly unreachable" BB
1517 // (such as if this BB is in an isolated loop of basic blocks) - the final
1518 // state is computed to be empty for this basic block
1519 // - a warning can be printed for this basic block
1520 for (BinaryBasicBlock &BB : BF) {
1521 MCInst *FirstInst = BB.getFirstNonPseudoInstr();
1522 // Skip empty basic block early for simplicity.
1523 if (!FirstInst)
1524 continue;
1525
1526 bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint();
1527 bool HasNoStateComputed = Analysis->getStateBefore(Inst: *FirstInst).empty();
1528 if (!IsDirectlyUnreachable && !HasNoStateComputed)
1529 continue;
1530
1531 // Arbitrarily attach the report to the first instruction of BB.
1532 // This is printed as "[message] in function [name], basic block ...,
1533 // at address ..." when the issue is reported to the user.
1534 Reports.push_back(Elt: make_generic_report(
1535 Location: MCInstReference::get(Inst: FirstInst, BF),
1536 Text: "Warning: possibly imprecise CFG, the analysis quality may be "
1537 "degraded in this function. According to BOLT, unreachable code is "
1538 "found" /* in function [name]... */));
1539 UnreachableBBReported = true;
1540 break; // One warning per function.
1541 }
1542 }
1543 // FIXME: Warn the user about imprecise analysis when the function has no CFG
1544 // information at all.
1545
1546 iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) {
1547 if (BC.MIB->isCFI(Inst))
1548 return;
1549
1550 const SrcState &S = Analysis->getStateBefore(Inst);
1551 if (S.empty()) {
1552 LLVM_DEBUG(
1553 { traceInst(BC, "Instruction has no state, skipping", Inst); });
1554 assert(UnreachableBBReported && "Should be reported at least once");
1555 (void)UnreachableBBReported;
1556 return;
1557 }
1558
1559 if (auto Report = shouldReportReturnGadget(BC, Inst, S))
1560 Reports.push_back(Elt: *Report);
1561
1562 if (PacRetGadgetsOnly)
1563 return;
1564
1565 if (auto Report = shouldReportUnsafeTailCall(BC, BF, Inst, S))
1566 Reports.push_back(Elt: *Report);
1567
1568 if (auto Report = shouldReportCallGadget(BC, Inst, S))
1569 Reports.push_back(Elt: *Report);
1570 if (auto Report = shouldReportSigningOracle(BC, Inst, S))
1571 Reports.push_back(Elt: *Report);
1572 });
1573}
1574
1575void FunctionAnalysisContext::augmentUnsafeUseReports(
1576 ArrayRef<PartialReport<MCPhysReg>> Reports) {
1577 SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
1578 // Re-compute the analysis with register tracking.
1579 auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: RegsToTrack);
1580 LLVM_DEBUG(
1581 { dbgs() << "\nRunning detailed src register safety analysis...\n"; });
1582 Analysis->run();
1583 LLVM_DEBUG({
1584 dbgs() << "After detailed src register safety analysis:\n";
1585 BF.dump();
1586 });
1587
1588 // Augment gadget reports.
1589 for (auto &Report : Reports) {
1590 MCInstReference Location = Report.Issue->Location;
1591 LLVM_DEBUG({ traceInst(BC, "Attaching clobbering info to", Location); });
1592 assert(Report.RequestedDetails &&
1593 "Should be removed by handleSimpleReports");
1594 auto DetailedInfo =
1595 std::make_shared<ClobberingInfo>(args: Analysis->getLastClobberingInsts(
1596 Inst: Location, BF, ClobberedReg: *Report.RequestedDetails));
1597 Result.Diagnostics.emplace_back(args: Report.Issue, args&: DetailedInfo);
1598 }
1599}
1600
1601void FunctionAnalysisContext::findUnsafeDefs(
1602 SmallVector<PartialReport<MCPhysReg>> &Reports) {
1603 if (PacRetGadgetsOnly)
1604 return;
1605
1606 auto Analysis = DstSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: {});
1607 LLVM_DEBUG({ dbgs() << "Running dst register safety analysis...\n"; });
1608 Analysis->run();
1609 LLVM_DEBUG({
1610 dbgs() << "After dst register safety analysis:\n";
1611 BF.dump();
1612 });
1613
1614 iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) {
1615 if (BC.MIB->isCFI(Inst))
1616 return;
1617
1618 const DstState &S = Analysis->getStateAfter(Inst);
1619
1620 if (auto Report = shouldReportAuthOracle(BC, Inst, S))
1621 Reports.push_back(Elt: *Report);
1622 });
1623}
1624
1625void FunctionAnalysisContext::augmentUnsafeDefReports(
1626 ArrayRef<PartialReport<MCPhysReg>> Reports) {
1627 SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
1628 // Re-compute the analysis with register tracking.
1629 auto Analysis = DstSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: RegsToTrack);
1630 LLVM_DEBUG(
1631 { dbgs() << "\nRunning detailed dst register safety analysis...\n"; });
1632 Analysis->run();
1633 LLVM_DEBUG({
1634 dbgs() << "After detailed dst register safety analysis:\n";
1635 BF.dump();
1636 });
1637
1638 // Augment gadget reports.
1639 for (auto &Report : Reports) {
1640 MCInstReference Location = Report.Issue->Location;
1641 LLVM_DEBUG({ traceInst(BC, "Attaching leakage info to", Location); });
1642 assert(Report.RequestedDetails &&
1643 "Should be removed by handleSimpleReports");
1644 auto DetailedInfo = std::make_shared<LeakageInfo>(
1645 args: Analysis->getLeakingInsts(Inst: Location, BF, LeakedReg: *Report.RequestedDetails));
1646 Result.Diagnostics.emplace_back(args: Report.Issue, args&: DetailedInfo);
1647 }
1648}
1649
1650void FunctionAnalysisContext::handleSimpleReports(
1651 SmallVector<PartialReport<MCPhysReg>> &Reports) {
1652 // Before re-running the detailed analysis, process the reports which do not
1653 // need any additional details to be attached.
1654 for (auto &Report : Reports) {
1655 if (!Report.RequestedDetails)
1656 Result.Diagnostics.emplace_back(args&: Report.Issue, args: nullptr);
1657 }
1658 llvm::erase_if(C&: Reports, P: [](const auto &R) { return !R.RequestedDetails; });
1659}
1660
1661void FunctionAnalysisContext::run() {
1662 LLVM_DEBUG({
1663 dbgs() << "Analyzing function " << BF.getPrintName()
1664 << ", AllocatorId = " << AllocatorId << "\n";
1665 BF.dump();
1666 });
1667
1668 SmallVector<PartialReport<MCPhysReg>> UnsafeUses;
1669 findUnsafeUses(Reports&: UnsafeUses);
1670 handleSimpleReports(Reports&: UnsafeUses);
1671 if (!UnsafeUses.empty())
1672 augmentUnsafeUseReports(Reports: UnsafeUses);
1673
1674 SmallVector<PartialReport<MCPhysReg>> UnsafeDefs;
1675 findUnsafeDefs(Reports&: UnsafeDefs);
1676 handleSimpleReports(Reports&: UnsafeDefs);
1677 if (!UnsafeDefs.empty())
1678 augmentUnsafeDefReports(Reports: UnsafeDefs);
1679}
1680
1681void Analysis::runOnFunction(BinaryFunction &BF,
1682 MCPlusBuilder::AllocatorIdTy AllocatorId) {
1683 FunctionAnalysisContext FA(BF, AllocatorId, PacRetGadgetsOnly);
1684 FA.run();
1685
1686 const FunctionAnalysisResult &FAR = FA.getResult();
1687 if (FAR.Diagnostics.empty())
1688 return;
1689
1690 // `runOnFunction` is typically getting called from multiple threads in
1691 // parallel. Therefore, use a lock to avoid data races when storing the
1692 // result of the analysis in the `AnalysisResults` map.
1693 {
1694 std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
1695 AnalysisResults[&BF] = FAR;
1696 }
1697}
1698
1699static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
1700 size_t StartIndex = 0, size_t EndIndex = -1) {
1701 if (EndIndex == (size_t)-1)
1702 EndIndex = BB->size() - 1;
1703 const BinaryFunction *BF = BB->getFunction();
1704 for (unsigned I = StartIndex; I <= EndIndex; ++I) {
1705 // FIXME: this assumes all instructions are 4 bytes in size. This is true
1706 // for AArch64, but it might be good to extract this function so it can be
1707 // used elsewhere and for other targets too.
1708 uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I;
1709 const MCInst &Inst = BB->getInstructionAtIndex(Index: I);
1710 if (BC.MIB->isCFI(Inst))
1711 continue;
1712 BC.printInstruction(OS&: outs(), Instruction: Inst, Offset: Address, Function: BF);
1713 }
1714}
1715
1716static void reportFoundGadgetInSingleBBSingleRelatedInst(
1717 raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
1718 const MCInstReference Location) {
1719 BinaryBasicBlock *BB = Location.getBasicBlock();
1720 assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
1721 assert(Location.ParentKind == MCInstReference::BasicBlockParent);
1722 MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
1723 if (BB == RelatedInstBB.BB) {
1724 OS << " This happens in the following basic block:\n";
1725 printBB(BC, BB);
1726 }
1727}
1728
1729void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
1730 StringRef IssueKind) const {
1731 BinaryFunction *BF = Location.getFunction();
1732 BinaryBasicBlock *BB = Location.getBasicBlock();
1733
1734 OS << "\nGS-PAUTH: " << IssueKind;
1735 OS << " in function " << BF->getPrintName();
1736 if (BB)
1737 OS << ", basic block " << BB->getName();
1738 OS << ", at address " << llvm::format(Fmt: "%x", Vals: Location.getAddress()) << "\n";
1739 OS << " The instruction is ";
1740 BC.printInstruction(OS, Instruction: Location, Offset: Location.getAddress(), Function: BF);
1741}
1742
1743void GadgetDiagnostic::generateReport(raw_ostream &OS,
1744 const BinaryContext &BC) const {
1745 printBasicInfo(OS, BC, IssueKind: Kind.getDescription());
1746}
1747
1748static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
1749 ArrayRef<MCInstReference> RelatedInstrs) {
1750 const BinaryFunction &BF = *Location.getFunction();
1751 const BinaryContext &BC = BF.getBinaryContext();
1752
1753 // Sort by address to ensure output is deterministic.
1754 SmallVector<MCInstReference> RI(RelatedInstrs);
1755 llvm::sort(C&: RI, Comp: [](const MCInstReference &A, const MCInstReference &B) {
1756 return A.getAddress() < B.getAddress();
1757 });
1758 for (unsigned I = 0; I < RI.size(); ++I) {
1759 MCInstReference InstRef = RI[I];
1760 OS << " " << (I + 1) << ". ";
1761 BC.printInstruction(OS, Instruction: InstRef, Offset: InstRef.getAddress(), Function: &BF);
1762 };
1763 if (RelatedInstrs.size() == 1) {
1764 const MCInstReference RelatedInst = RelatedInstrs[0];
1765 // Printing the details for the MCInstReference::FunctionParent case
1766 // is not implemented not to overcomplicate the code, as most functions
1767 // are expected to have CFG information.
1768 if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent)
1769 reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst,
1770 Location);
1771 }
1772}
1773
1774void ClobberingInfo::print(raw_ostream &OS,
1775 const MCInstReference Location) const {
1776 OS << " The " << ClobberingInstrs.size()
1777 << " instructions that write to the affected registers after any "
1778 "authentication are:\n";
1779 printRelatedInstrs(OS, Location, RelatedInstrs: ClobberingInstrs);
1780}
1781
1782void LeakageInfo::print(raw_ostream &OS, const MCInstReference Location) const {
1783 OS << " The " << LeakingInstrs.size()
1784 << " instructions that leak the affected registers are:\n";
1785 printRelatedInstrs(OS, Location, RelatedInstrs: LeakingInstrs);
1786}
1787
1788void GenericDiagnostic::generateReport(raw_ostream &OS,
1789 const BinaryContext &BC) const {
1790 printBasicInfo(OS, BC, IssueKind: Text);
1791}
1792
1793Error Analysis::runOnFunctions(BinaryContext &BC) {
1794 ParallelUtilities::WorkFuncWithAllocTy WorkFun =
1795 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
1796 runOnFunction(BF, AllocatorId);
1797 };
1798
1799 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
1800 return false;
1801 };
1802
1803 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
1804 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun,
1805 SkipPredicate: SkipFunc, LogName: "PAuthGadgetScanner");
1806
1807 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
1808 if (!AnalysisResults.count(x: BF))
1809 continue;
1810 for (const FinalReport &R : AnalysisResults[BF].Diagnostics) {
1811 R.Issue->generateReport(OS&: outs(), BC);
1812 if (R.Details)
1813 R.Details->print(OS&: outs(), Location: R.Issue->Location);
1814 }
1815 }
1816 return Error::success();
1817}
1818
1819} // namespace PAuthGadgetScanner
1820} // namespace bolt
1821} // namespace llvm
1822

source code of bolt/lib/Passes/PAuthGadgetScanner.cpp