1//===- bolt/Passes/PAuthGadgetScanner.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass that looks for any AArch64 return instructions
10// that may not be protected by PAuth authentication instructions when needed.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Passes/PAuthGadgetScanner.h"
15#include "bolt/Core/ParallelUtilities.h"
16#include "bolt/Passes/DataflowAnalysis.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Support/Format.h"
21#include <memory>
22
23#define DEBUG_TYPE "bolt-pauth-scanner"
24
25namespace llvm {
26namespace bolt {
27
28raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
29 OS << "MCInstBBRef<";
30 if (Ref.BB == nullptr)
31 OS << "BB:(null)";
32 else
33 OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
34 OS << ">";
35 return OS;
36}
37
38raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
39 OS << "MCInstBFRef<";
40 if (Ref.BF == nullptr)
41 OS << "BF:(null)";
42 else
43 OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
44 OS << ">";
45 return OS;
46}
47
48raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
49 switch (Ref.ParentKind) {
50 case MCInstReference::BasicBlockParent:
51 OS << Ref.U.BBRef;
52 return OS;
53 case MCInstReference::FunctionParent:
54 OS << Ref.U.BFRef;
55 return OS;
56 }
57 llvm_unreachable("");
58}
59
60namespace PAuthGadgetScanner {
61
62[[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
63 const MCInst &MI) {
64 dbgs() << " " << Label << ": ";
65 BC.printInstruction(OS&: dbgs(), Instruction: MI);
66}
67
68[[maybe_unused]] static void traceReg(const BinaryContext &BC, StringRef Label,
69 MCPhysReg Reg) {
70 dbgs() << " " << Label << ": ";
71 if (Reg == BC.MIB->getNoRegister())
72 dbgs() << "(none)";
73 else
74 dbgs() << BC.MRI->getName(RegNo: Reg);
75 dbgs() << "\n";
76}
77
78[[maybe_unused]] static void traceRegMask(const BinaryContext &BC,
79 StringRef Label, BitVector Mask) {
80 dbgs() << " " << Label << ": ";
81 RegStatePrinter(BC).print(OS&: dbgs(), State: Mask);
82 dbgs() << "\n";
83}
84
85// This class represents mapping from a set of arbitrary physical registers to
86// consecutive array indexes.
87class TrackedRegisters {
88 static constexpr uint16_t NoIndex = -1;
89 const std::vector<MCPhysReg> Registers;
90 std::vector<uint16_t> RegToIndexMapping;
91
92 static size_t getMappingSize(ArrayRef<MCPhysReg> RegsToTrack) {
93 if (RegsToTrack.empty())
94 return 0;
95 return 1 + *llvm::max_element(Range&: RegsToTrack);
96 }
97
98public:
99 TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack)
100 : Registers(RegsToTrack),
101 RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
102 for (unsigned I = 0; I < RegsToTrack.size(); ++I)
103 RegToIndexMapping[RegsToTrack[I]] = I;
104 }
105
106 ArrayRef<MCPhysReg> getRegisters() const { return Registers; }
107
108 size_t getNumTrackedRegisters() const { return Registers.size(); }
109
110 bool empty() const { return Registers.empty(); }
111
112 bool isTracked(MCPhysReg Reg) const {
113 bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() &&
114 RegToIndexMapping[Reg] != NoIndex;
115 assert(IsTracked == llvm::is_contained(Registers, Reg));
116 return IsTracked;
117 }
118
119 unsigned getIndex(MCPhysReg Reg) const {
120 assert(isTracked(Reg) && "Register is not tracked");
121 return RegToIndexMapping[Reg];
122 }
123};
124
125// The security property that is checked is:
126// When a register is used as the address to jump to in a return instruction,
127// that register must be safe-to-dereference. It must either
128// (a) be safe-to-dereference at function entry and never be changed within this
129// function, i.e. have the same value as when the function started, or
130// (b) the last write to the register must be by an authentication instruction.
131
132// This property is checked by using dataflow analysis to keep track of which
133// registers have been written (def-ed), since last authenticated. For pac-ret,
134// any return instruction using a register which is not safe-to-dereference is
135// a gadget to be reported. For PAuthABI, probably at least any indirect control
136// flow using such a register should be reported.
137
138// Furthermore, when producing a diagnostic for a found non-pac-ret protected
139// return, the analysis also lists the last instructions that wrote to the
140// register used in the return instruction.
141// The total set of registers used in return instructions in a given function is
142// small. It almost always is just `X30`.
143// In order to reduce the memory consumption of storing this additional state
144// during the dataflow analysis, this is computed by running the dataflow
145// analysis twice:
146// 1. In the first run, the dataflow analysis only keeps track of the security
147// property: i.e. which registers have been overwritten since the last
148// time they've been authenticated.
149// 2. If the first run finds any return instructions using a register last
150// written by a non-authenticating instruction, the dataflow analysis will
151// be run a second time. The first run will return which registers are used
152// in the gadgets to be reported. This information is used in the second run
153// to also track which instructions last wrote to those registers.
154
155/// A state representing which registers are safe to use by an instruction
156/// at a given program point.
157///
158/// To simplify reasoning, let's stick with the following approach:
159/// * when state is updated by the data-flow analysis, the sub-, super- and
160/// overlapping registers are marked as needed
161/// * when the particular instruction is checked if it represents a gadget,
162/// the specific bit of BitVector should be usable to answer this.
163///
164/// For example, on AArch64:
165/// * An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as
166/// safe-to-dereference. It does not change the state of X0_X1, for example,
167/// as super-registers partially retain their old, unsafe values.
168/// * LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps
169/// with: W1, W1_HI, X0_X1 and so on.
170/// * RET (which is implicitly RET X30) is a protected return if and only if
171/// X30 is safe-to-dereference - the state computed for sub- and
172/// super-registers is not inspected.
173struct SrcState {
174 /// A BitVector containing the registers that are either authenticated
175 /// (assuming failed authentication is permitted to produce an invalid
176 /// address, provided it generates an error on memory access) or whose
177 /// value is known not to be attacker-controlled under Pointer Authentication
178 /// threat model. The registers in this set are either
179 /// * not clobbered since being authenticated, or
180 /// * trusted at function entry and were not clobbered yet, or
181 /// * contain a safely materialized address.
182 BitVector SafeToDerefRegs;
183 /// A BitVector containing the registers that are either authenticated
184 /// *successfully* or whose value is known not to be attacker-controlled
185 /// under Pointer Authentication threat model.
186 /// The registers in this set are either
187 /// * authenticated and then checked to be authenticated successfully
188 /// (and not clobbered since then), or
189 /// * trusted at function entry and were not clobbered yet, or
190 /// * contain a safely materialized address.
191 BitVector TrustedRegs;
192 /// A vector of sets, only used in the second data flow run.
193 /// Each element in the vector represents one of the registers for which we
194 /// track the set of last instructions that wrote to this register. For
195 /// pac-ret analysis, the expectation is that almost all return instructions
196 /// only use register `X30`, and therefore, this vector will probably have
197 /// length 1 in the second run.
198 std::vector<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg;
199
200 /// Construct an empty state.
201 SrcState() {}
202
203 SrcState(unsigned NumRegs, unsigned NumRegsToTrack)
204 : SafeToDerefRegs(NumRegs), TrustedRegs(NumRegs),
205 LastInstWritingReg(NumRegsToTrack) {}
206
207 SrcState &merge(const SrcState &StateIn) {
208 if (StateIn.empty())
209 return *this;
210 if (empty())
211 return (*this = StateIn);
212
213 SafeToDerefRegs &= StateIn.SafeToDerefRegs;
214 TrustedRegs &= StateIn.TrustedRegs;
215 for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
216 for (const MCInst *J : StateIn.LastInstWritingReg[I])
217 LastInstWritingReg[I].insert(Ptr: J);
218 return *this;
219 }
220
221 /// Returns true if this object does not store state of any registers -
222 /// neither safe, nor unsafe ones.
223 bool empty() const { return SafeToDerefRegs.empty(); }
224
225 bool operator==(const SrcState &RHS) const {
226 return SafeToDerefRegs == RHS.SafeToDerefRegs &&
227 TrustedRegs == RHS.TrustedRegs &&
228 LastInstWritingReg == RHS.LastInstWritingReg;
229 }
230 bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); }
231};
232
233static void
234printLastInsts(raw_ostream &OS,
235 ArrayRef<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg) {
236 OS << "Insts: ";
237 for (unsigned I = 0; I < LastInstWritingReg.size(); ++I) {
238 auto &Set = LastInstWritingReg[I];
239 OS << "[" << I << "](";
240 for (const MCInst *MCInstP : Set)
241 OS << MCInstP << " ";
242 OS << ")";
243 }
244}
245
246raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) {
247 OS << "src-state<";
248 if (S.empty()) {
249 OS << "empty";
250 } else {
251 OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", ";
252 OS << "TrustedRegs: " << S.TrustedRegs << ", ";
253 printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg);
254 }
255 OS << ">";
256 return OS;
257}
258
259class SrcStatePrinter {
260public:
261 void print(raw_ostream &OS, const SrcState &State) const;
262 explicit SrcStatePrinter(const BinaryContext &BC) : BC(BC) {}
263
264private:
265 const BinaryContext &BC;
266};
267
268void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const {
269 RegStatePrinter RegStatePrinter(BC);
270 OS << "src-state<";
271 if (S.empty()) {
272 assert(S.SafeToDerefRegs.empty());
273 assert(S.TrustedRegs.empty());
274 assert(S.LastInstWritingReg.empty());
275 OS << "empty";
276 } else {
277 OS << "SafeToDerefRegs: ";
278 RegStatePrinter.print(OS, State: S.SafeToDerefRegs);
279 OS << ", TrustedRegs: ";
280 RegStatePrinter.print(OS, State: S.TrustedRegs);
281 OS << ", ";
282 printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg);
283 }
284 OS << ">";
285}
286
287/// Computes which registers are safe to be used by control flow and signing
288/// instructions.
289///
290/// This is the base class for two implementations: a dataflow-based analysis
291/// which is intended to be used for most functions and a simplified CFG-unaware
292/// version for functions without reconstructed CFG.
293class SrcSafetyAnalysis {
294public:
295 SrcSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor)
296 : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()),
297 RegsToTrackInstsFor(RegsToTrackInstsFor) {}
298
299 virtual ~SrcSafetyAnalysis() {}
300
301 static std::shared_ptr<SrcSafetyAnalysis>
302 create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
303 ArrayRef<MCPhysReg> RegsToTrackInstsFor);
304
305 virtual void run() = 0;
306 virtual const SrcState &getStateBefore(const MCInst &Inst) const = 0;
307
308protected:
309 BinaryContext &BC;
310 const unsigned NumRegs;
311 /// RegToTrackInstsFor is the set of registers for which the dataflow analysis
312 /// must compute which the last set of instructions writing to it are.
313 const TrackedRegisters RegsToTrackInstsFor;
314 /// Stores information about the detected instruction sequences emitted to
315 /// check an authenticated pointer. Specifically, if such sequence is detected
316 /// in a basic block, it maps the last instruction of that basic block to
317 /// (CheckedRegister, FirstInstOfTheSequence) pair, see the description of
318 /// MCPlusBuilder::getAuthCheckedReg(BB) method.
319 ///
320 /// As the detection of such sequences requires iterating over the adjacent
321 /// instructions, it should be done before calling computeNext(), which
322 /// operates on separate instructions.
323 DenseMap<const MCInst *, std::pair<MCPhysReg, const MCInst *>>
324 CheckerSequenceInfo;
325
326 SmallPtrSet<const MCInst *, 4> &lastWritingInsts(SrcState &S,
327 MCPhysReg Reg) const {
328 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
329 return S.LastInstWritingReg[Index];
330 }
331 const SmallPtrSet<const MCInst *, 4> &lastWritingInsts(const SrcState &S,
332 MCPhysReg Reg) const {
333 unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
334 return S.LastInstWritingReg[Index];
335 }
336
337 SrcState createEntryState() {
338 SrcState S(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
339 for (MCPhysReg Reg : BC.MIB->getTrustedLiveInRegs())
340 S.TrustedRegs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true);
341 S.SafeToDerefRegs = S.TrustedRegs;
342 return S;
343 }
344
345 BitVector getClobberedRegs(const MCInst &Point) const {
346 BitVector Clobbered(NumRegs);
347 // Assume a call can clobber all registers, including callee-saved
348 // registers. There's a good chance that callee-saved registers will be
349 // saved on the stack at some point during execution of the callee.
350 // Therefore they should also be considered as potentially modified by an
351 // attacker/written to.
352 // Also, not all functions may respect the AAPCS ABI rules about
353 // caller/callee-saved registers.
354 if (BC.MIB->isCall(Inst: Point))
355 Clobbered.set();
356 else
357 BC.MIB->getClobberedRegs(Inst: Point, Regs&: Clobbered);
358 return Clobbered;
359 }
360
361 // Returns all registers that can be treated as if they are written by an
362 // authentication instruction.
363 SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
364 const SrcState &Cur) const {
365 SmallVector<MCPhysReg> Regs;
366
367 // A signed pointer can be authenticated, or
368 bool Dummy = false;
369 if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked&: Dummy))
370 Regs.push_back(Elt: *AutReg);
371
372 // ... a safe address can be materialized, or
373 if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point))
374 Regs.push_back(Elt: *NewAddrReg);
375
376 // ... an address can be updated in a safe manner, producing the result
377 // which is as trusted as the input address.
378 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
379 if (Cur.SafeToDerefRegs[DstAndSrc->second])
380 Regs.push_back(Elt: DstAndSrc->first);
381 }
382
383 return Regs;
384 }
385
386 // Returns all registers made trusted by this instruction.
387 SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
388 const SrcState &Cur) const {
389 SmallVector<MCPhysReg> Regs;
390
391 // An authenticated pointer can be checked, or
392 std::optional<MCPhysReg> CheckedReg =
393 BC.MIB->getAuthCheckedReg(Inst: Point, /*MayOverwrite=*/false);
394 if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg])
395 Regs.push_back(Elt: *CheckedReg);
396
397 // ... a pointer can be authenticated by an instruction that always checks
398 // the pointer, or
399 bool IsChecked = false;
400 std::optional<MCPhysReg> AutReg =
401 BC.MIB->getWrittenAuthenticatedReg(Inst: Point, IsChecked);
402 if (AutReg && IsChecked)
403 Regs.push_back(Elt: *AutReg);
404
405 if (CheckerSequenceInfo.contains(Val: &Point)) {
406 MCPhysReg CheckedReg;
407 const MCInst *FirstCheckerInst;
408 std::tie(args&: CheckedReg, args&: FirstCheckerInst) = CheckerSequenceInfo.at(Val: &Point);
409
410 // FirstCheckerInst should belong to the same basic block (see the
411 // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
412 // deterministically processed a few steps before this instruction.
413 const SrcState &StateBeforeChecker = getStateBefore(Inst: *FirstCheckerInst);
414 if (StateBeforeChecker.SafeToDerefRegs[CheckedReg])
415 Regs.push_back(Elt: CheckedReg);
416 }
417
418 // ... a safe address can be materialized, or
419 if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Inst: Point))
420 Regs.push_back(Elt: *NewAddrReg);
421
422 // ... an address can be updated in a safe manner, producing the result
423 // which is as trusted as the input address.
424 if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
425 if (Cur.TrustedRegs[DstAndSrc->second])
426 Regs.push_back(Elt: DstAndSrc->first);
427 }
428
429 return Regs;
430 }
431
432 SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
433 SrcStatePrinter P(BC);
434 LLVM_DEBUG({
435 dbgs() << " SrcSafetyAnalysis::ComputeNext(";
436 BC.InstPrinter->printInst(&const_cast<MCInst &>(Point), 0, "", *BC.STI,
437 dbgs());
438 dbgs() << ", ";
439 P.print(dbgs(), Cur);
440 dbgs() << ")\n";
441 });
442
443 // If this instruction is reachable, a non-empty state will be propagated
444 // to it from the entry basic block sooner or later. Until then, it is both
445 // more efficient and easier to reason about to skip computeNext().
446 if (Cur.empty()) {
447 LLVM_DEBUG(
448 { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; });
449 return SrcState();
450 }
451
452 // First, compute various properties of the instruction, taking the state
453 // before its execution into account, if necessary.
454
455 BitVector Clobbered = getClobberedRegs(Point);
456 SmallVector<MCPhysReg> NewSafeToDerefRegs =
457 getRegsMadeSafeToDeref(Point, Cur);
458 SmallVector<MCPhysReg> NewTrustedRegs = getRegsMadeTrusted(Point, Cur);
459
460 // Ideally, being trusted is a strictly stronger property than being
461 // safe-to-dereference. To simplify the computation of Next state, enforce
462 // this for NewSafeToDerefRegs and NewTrustedRegs. Additionally, this
463 // fixes the properly for "cumulative" register states in tricky cases
464 // like the following:
465 //
466 // ; LR is safe to dereference here
467 // mov x16, x30 ; start of the sequence, LR is s-t-d right before
468 // xpaclri ; clobbers LR, LR is not safe anymore
469 // cmp x30, x16
470 // b.eq 1f ; end of the sequence: LR is marked as trusted
471 // brk 0x1234
472 // 1:
473 // ; at this point LR would be marked as trusted,
474 // ; but not safe-to-dereference
475 //
476 for (auto TrustedReg : NewTrustedRegs) {
477 if (!is_contained(Range&: NewSafeToDerefRegs, Element: TrustedReg))
478 NewSafeToDerefRegs.push_back(Elt: TrustedReg);
479 }
480
481 // Then, compute the state after this instruction is executed.
482 SrcState Next = Cur;
483
484 Next.SafeToDerefRegs.reset(RHS: Clobbered);
485 Next.TrustedRegs.reset(RHS: Clobbered);
486 // Keep track of this instruction if it writes to any of the registers we
487 // need to track that for:
488 for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters())
489 if (Clobbered[Reg])
490 lastWritingInsts(S&: Next, Reg) = {&Point};
491
492 // After accounting for clobbered registers in general, override the state
493 // according to authentication and other *special cases* of clobbering.
494
495 // The sub-registers are also safe-to-dereference now, but not their
496 // super-registers (as they retain untrusted register units).
497 BitVector NewSafeSubregs(NumRegs);
498 for (MCPhysReg SafeReg : NewSafeToDerefRegs)
499 NewSafeSubregs |= BC.MIB->getAliases(Reg: SafeReg, /*OnlySmaller=*/true);
500 for (MCPhysReg Reg : NewSafeSubregs.set_bits()) {
501 Next.SafeToDerefRegs.set(Reg);
502 if (RegsToTrackInstsFor.isTracked(Reg))
503 lastWritingInsts(S&: Next, Reg).clear();
504 }
505
506 // Process new trusted registers.
507 for (MCPhysReg TrustedReg : NewTrustedRegs)
508 Next.TrustedRegs |= BC.MIB->getAliases(Reg: TrustedReg, /*OnlySmaller=*/true);
509
510 LLVM_DEBUG({
511 dbgs() << " .. result: (";
512 P.print(dbgs(), Next);
513 dbgs() << ")\n";
514 });
515
516 return Next;
517 }
518
519public:
520 std::vector<MCInstReference>
521 getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF,
522 MCPhysReg ClobberedReg) const {
523 const SrcState &S = getStateBefore(Inst);
524
525 std::vector<MCInstReference> Result;
526 for (const MCInst *Inst : lastWritingInsts(S, Reg: ClobberedReg)) {
527 MCInstReference Ref = MCInstReference::get(Inst, BF);
528 assert(Ref && "Expected Inst to be found");
529 Result.push_back(x: Ref);
530 }
531 return Result;
532 }
533};
534
535class DataflowSrcSafetyAnalysis
536 : public SrcSafetyAnalysis,
537 public DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState,
538 /*Backward=*/false, SrcStatePrinter> {
539 using DFParent = DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, false,
540 SrcStatePrinter>;
541 friend DFParent;
542
543 using SrcSafetyAnalysis::BC;
544 using SrcSafetyAnalysis::computeNext;
545
546public:
547 DataflowSrcSafetyAnalysis(BinaryFunction &BF,
548 MCPlusBuilder::AllocatorIdTy AllocId,
549 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
550 : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), DFParent(BF, AllocId) {}
551
552 const SrcState &getStateBefore(const MCInst &Inst) const override {
553 return DFParent::getStateBefore(Point: Inst).get();
554 }
555
556 void run() override {
557 for (BinaryBasicBlock &BB : Func) {
558 if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) {
559 MCPhysReg CheckedReg = CheckerInfo->first;
560 MCInst &FirstInst = *CheckerInfo->second;
561 MCInst &LastInst = *BB.getLastNonPseudoInstr();
562 LLVM_DEBUG({
563 dbgs() << "Found pointer checking sequence in " << BB.getName()
564 << ":\n";
565 traceReg(BC, "Checked register", CheckedReg);
566 traceInst(BC, "First instruction", FirstInst);
567 traceInst(BC, "Last instruction", LastInst);
568 });
569 (void)CheckedReg;
570 (void)FirstInst;
571 assert(llvm::any_of(BB, [&](MCInst &I) { return &I == &FirstInst; }) &&
572 "Data-flow analysis expects the checker not to cross BBs");
573 CheckerSequenceInfo[&LastInst] = *CheckerInfo;
574 }
575 }
576 DFParent::run();
577 }
578
579protected:
580 void preflight() {}
581
582 SrcState getStartingStateAtBB(const BinaryBasicBlock &BB) {
583 if (BB.isEntryPoint())
584 return createEntryState();
585
586 return SrcState();
587 }
588
589 SrcState getStartingStateAtPoint(const MCInst &Point) { return SrcState(); }
590
591 void doConfluence(SrcState &StateOut, const SrcState &StateIn) {
592 SrcStatePrinter P(BC);
593 LLVM_DEBUG({
594 dbgs() << " DataflowSrcSafetyAnalysis::Confluence(\n";
595 dbgs() << " State 1: ";
596 P.print(dbgs(), StateOut);
597 dbgs() << "\n";
598 dbgs() << " State 2: ";
599 P.print(dbgs(), StateIn);
600 dbgs() << ")\n";
601 });
602
603 StateOut.merge(StateIn);
604
605 LLVM_DEBUG({
606 dbgs() << " merged state: ";
607 P.print(dbgs(), StateOut);
608 dbgs() << "\n";
609 });
610 }
611
612 StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis"; }
613};
614
615// A simplified implementation of DataflowSrcSafetyAnalysis for functions
616// lacking CFG information.
617//
618// Let assume the instructions can only be executed linearly unless there is
619// a label to jump to - this should handle both directly jumping to a location
620// encoded as an immediate operand of a branch instruction, as well as saving a
621// branch destination somewhere and passing it to an indirect branch instruction
622// later, provided no arithmetic is performed on the destination address:
623//
624// ; good: the destination is directly encoded into the branch instruction
625// cbz x0, some_label
626//
627// ; good: the branch destination is first stored and then used as-is
628// adr x1, some_label
629// br x1
630//
631// ; bad: some clever arithmetic is performed manually
632// adr x1, some_label
633// add x1, x1, #4
634// br x1
635// ...
636// some_label:
637// ; pessimistically reset the state as we are unsure where we came from
638// ...
639// ret
640// JTI0:
641// .byte some_label - Ltmp0 ; computing offsets using labels may probably
642// work too, provided enough information is
643// retained by the assembler and linker
644//
645// Then, a function can be split into a number of disjoint contiguous sequences
646// of instructions without labels in between. These sequences can be processed
647// the same way basic blocks are processed by data-flow analysis, assuming
648// pessimistically that all registers are unsafe at the start of each sequence.
649class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
650 BinaryFunction &BF;
651 MCPlusBuilder::AllocatorIdTy AllocId;
652 unsigned StateAnnotationIndex;
653
654 void cleanStateAnnotations() {
655 for (auto &I : BF.instrs())
656 BC.MIB->removeAnnotation(Inst&: I.second, Index: StateAnnotationIndex);
657 }
658
659 /// Creates a state with all registers marked unsafe (not to be confused
660 /// with empty state).
661 SrcState createUnsafeState() const {
662 return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
663 }
664
665public:
666 CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
667 MCPlusBuilder::AllocatorIdTy AllocId,
668 ArrayRef<MCPhysReg> RegsToTrackInstsFor)
669 : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), BF(BF), AllocId(AllocId) {
670 StateAnnotationIndex =
671 BC.MIB->getOrCreateAnnotationIndex(Name: "CFGUnawareSrcSafetyAnalysis");
672 }
673
674 void run() override {
675 SrcState S = createEntryState();
676 for (auto &I : BF.instrs()) {
677 MCInst &Inst = I.second;
678
679 // If there is a label before this instruction, it is possible that it
680 // can be jumped-to, thus conservatively resetting S. As an exception,
681 // let's ignore any labels at the beginning of the function, as at least
682 // one label is expected there.
683 if (BF.hasLabelAt(Offset: I.first) && &Inst != &BF.instrs().begin()->second) {
684 LLVM_DEBUG({
685 traceInst(BC, "Due to label, resetting the state before", Inst);
686 });
687 S = createUnsafeState();
688 }
689
690 // Check if we need to remove an old annotation (this is the case if
691 // this is the second, detailed, run of the analysis).
692 if (BC.MIB->hasAnnotation(Inst, Index: StateAnnotationIndex))
693 BC.MIB->removeAnnotation(Inst, Index: StateAnnotationIndex);
694 // Attach the state *before* this instruction executes.
695 BC.MIB->addAnnotation(Inst, Index: StateAnnotationIndex, Val: S, AllocatorId: AllocId);
696
697 // Compute the state after this instruction executes.
698 S = computeNext(Point: Inst, Cur: S);
699 }
700 }
701
702 const SrcState &getStateBefore(const MCInst &Inst) const override {
703 return BC.MIB->getAnnotationAs<SrcState>(Inst, Index: StateAnnotationIndex);
704 }
705
706 ~CFGUnawareSrcSafetyAnalysis() { cleanStateAnnotations(); }
707};
708
709std::shared_ptr<SrcSafetyAnalysis>
710SrcSafetyAnalysis::create(BinaryFunction &BF,
711 MCPlusBuilder::AllocatorIdTy AllocId,
712 ArrayRef<MCPhysReg> RegsToTrackInstsFor) {
713 if (BF.hasCFG())
714 return std::make_shared<DataflowSrcSafetyAnalysis>(args&: BF, args&: AllocId,
715 args&: RegsToTrackInstsFor);
716 return std::make_shared<CFGUnawareSrcSafetyAnalysis>(args&: BF, args&: AllocId,
717 args&: RegsToTrackInstsFor);
718}
719
720// This function could return PartialReport<T>, but currently T is always
721// MCPhysReg, even though it is an implementation detail.
722static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location,
723 StringRef Text) {
724 auto Report = std::make_shared<GenericDiagnostic>(args&: Location, args&: Text);
725 return PartialReport<MCPhysReg>(Report, std::nullopt);
726}
727
728template <typename T>
729static PartialReport<T> make_gadget_report(const GadgetKind &Kind,
730 MCInstReference Location,
731 T RequestedDetails) {
732 auto Report = std::make_shared<GadgetDiagnostic>(args: Kind, args&: Location);
733 return PartialReport<T>(Report, RequestedDetails);
734}
735
736static std::optional<PartialReport<MCPhysReg>>
737shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
738 const SrcState &S) {
739 static const GadgetKind RetKind("non-protected ret found");
740 if (!BC.MIB->isReturn(Inst))
741 return std::nullopt;
742
743 bool IsAuthenticated = false;
744 std::optional<MCPhysReg> RetReg =
745 BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
746 if (!RetReg) {
747 return make_generic_report(
748 Location: Inst, Text: "Warning: pac-ret analysis could not analyze this return "
749 "instruction");
750 }
751 if (IsAuthenticated)
752 return std::nullopt;
753
754 LLVM_DEBUG({
755 traceInst(BC, "Found RET inst", Inst);
756 traceReg(BC, "RetReg", *RetReg);
757 traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
758 });
759
760 if (S.SafeToDerefRegs[*RetReg])
761 return std::nullopt;
762
763 return make_gadget_report(Kind: RetKind, Location: Inst, RequestedDetails: *RetReg);
764}
765
766static std::optional<PartialReport<MCPhysReg>>
767shouldReportCallGadget(const BinaryContext &BC, const MCInstReference &Inst,
768 const SrcState &S) {
769 static const GadgetKind CallKind("non-protected call found");
770 if (!BC.MIB->isIndirectCall(Inst) && !BC.MIB->isIndirectBranch(Inst))
771 return std::nullopt;
772
773 bool IsAuthenticated = false;
774 MCPhysReg DestReg =
775 BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
776 if (IsAuthenticated)
777 return std::nullopt;
778
779 assert(DestReg != BC.MIB->getNoRegister() && "Valid register expected");
780 LLVM_DEBUG({
781 traceInst(BC, "Found call inst", Inst);
782 traceReg(BC, "Call destination reg", DestReg);
783 traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
784 });
785 if (S.SafeToDerefRegs[DestReg])
786 return std::nullopt;
787
788 return make_gadget_report(Kind: CallKind, Location: Inst, RequestedDetails: DestReg);
789}
790
791static std::optional<PartialReport<MCPhysReg>>
792shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst,
793 const SrcState &S) {
794 static const GadgetKind SigningOracleKind("signing oracle found");
795
796 std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst);
797 if (!SignedReg)
798 return std::nullopt;
799
800 LLVM_DEBUG({
801 traceInst(BC, "Found sign inst", Inst);
802 traceReg(BC, "Signed reg", *SignedReg);
803 traceRegMask(BC, "TrustedRegs", S.TrustedRegs);
804 });
805 if (S.TrustedRegs[*SignedReg])
806 return std::nullopt;
807
808 return make_gadget_report(Kind: SigningOracleKind, Location: Inst, RequestedDetails: *SignedReg);
809}
810
811template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
812 if (BF.hasCFG()) {
813 for (BinaryBasicBlock &BB : BF)
814 for (int64_t I = 0, E = BB.size(); I < E; ++I)
815 Fn(MCInstInBBReference(&BB, I));
816 } else {
817 for (auto I : BF.instrs())
818 Fn(MCInstInBFReference(&BF, I.first));
819 }
820}
821
822static SmallVector<MCPhysReg>
823collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
824 SmallSet<MCPhysReg, 4> RegsToTrack;
825 for (auto Report : Reports)
826 if (Report.RequestedDetails)
827 RegsToTrack.insert(V: *Report.RequestedDetails);
828
829 return SmallVector<MCPhysReg>(RegsToTrack.begin(), RegsToTrack.end());
830}
831
832void FunctionAnalysisContext::findUnsafeUses(
833 SmallVector<PartialReport<MCPhysReg>> &Reports) {
834 auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: {});
835 LLVM_DEBUG({ dbgs() << "Running src register safety analysis...\n"; });
836 Analysis->run();
837 LLVM_DEBUG({
838 dbgs() << "After src register safety analysis:\n";
839 BF.dump();
840 });
841
842 iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) {
843 const SrcState &S = Analysis->getStateBefore(Inst);
844
845 // If non-empty state was never propagated from the entry basic block
846 // to Inst, assume it to be unreachable and report a warning.
847 if (S.empty()) {
848 Reports.push_back(
849 Elt: make_generic_report(Location: Inst, Text: "Warning: unreachable instruction found"));
850 return;
851 }
852
853 if (auto Report = shouldReportReturnGadget(BC, Inst, S))
854 Reports.push_back(Elt: *Report);
855
856 if (PacRetGadgetsOnly)
857 return;
858
859 if (auto Report = shouldReportCallGadget(BC, Inst, S))
860 Reports.push_back(Elt: *Report);
861 if (auto Report = shouldReportSigningOracle(BC, Inst, S))
862 Reports.push_back(Elt: *Report);
863 });
864}
865
866void FunctionAnalysisContext::augmentUnsafeUseReports(
867 ArrayRef<PartialReport<MCPhysReg>> Reports) {
868 SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
869 // Re-compute the analysis with register tracking.
870 auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: RegsToTrack);
871 LLVM_DEBUG(
872 { dbgs() << "\nRunning detailed src register safety analysis...\n"; });
873 Analysis->run();
874 LLVM_DEBUG({
875 dbgs() << "After detailed src register safety analysis:\n";
876 BF.dump();
877 });
878
879 // Augment gadget reports.
880 for (auto &Report : Reports) {
881 MCInstReference Location = Report.Issue->Location;
882 LLVM_DEBUG({ traceInst(BC, "Attaching clobbering info to", Location); });
883 assert(Report.RequestedDetails &&
884 "Should be removed by handleSimpleReports");
885 auto DetailedInfo =
886 std::make_shared<ClobberingInfo>(args: Analysis->getLastClobberingInsts(
887 Inst: Location, BF, ClobberedReg: *Report.RequestedDetails));
888 Result.Diagnostics.emplace_back(args: Report.Issue, args&: DetailedInfo);
889 }
890}
891
892void FunctionAnalysisContext::handleSimpleReports(
893 SmallVector<PartialReport<MCPhysReg>> &Reports) {
894 // Before re-running the detailed analysis, process the reports which do not
895 // need any additional details to be attached.
896 for (auto &Report : Reports) {
897 if (!Report.RequestedDetails)
898 Result.Diagnostics.emplace_back(args&: Report.Issue, args: nullptr);
899 }
900 llvm::erase_if(C&: Reports, P: [](const auto &R) { return !R.RequestedDetails; });
901}
902
903void FunctionAnalysisContext::run() {
904 LLVM_DEBUG({
905 dbgs() << "Analyzing function " << BF.getPrintName()
906 << ", AllocatorId = " << AllocatorId << "\n";
907 BF.dump();
908 });
909
910 SmallVector<PartialReport<MCPhysReg>> UnsafeUses;
911 findUnsafeUses(Reports&: UnsafeUses);
912 handleSimpleReports(Reports&: UnsafeUses);
913 if (!UnsafeUses.empty())
914 augmentUnsafeUseReports(Reports: UnsafeUses);
915}
916
917void Analysis::runOnFunction(BinaryFunction &BF,
918 MCPlusBuilder::AllocatorIdTy AllocatorId) {
919 FunctionAnalysisContext FA(BF, AllocatorId, PacRetGadgetsOnly);
920 FA.run();
921
922 const FunctionAnalysisResult &FAR = FA.getResult();
923 if (FAR.Diagnostics.empty())
924 return;
925
926 // `runOnFunction` is typically getting called from multiple threads in
927 // parallel. Therefore, use a lock to avoid data races when storing the
928 // result of the analysis in the `AnalysisResults` map.
929 {
930 std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
931 AnalysisResults[&BF] = FAR;
932 }
933}
934
935static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
936 size_t StartIndex = 0, size_t EndIndex = -1) {
937 if (EndIndex == (size_t)-1)
938 EndIndex = BB->size() - 1;
939 const BinaryFunction *BF = BB->getFunction();
940 for (unsigned I = StartIndex; I <= EndIndex; ++I) {
941 // FIXME: this assumes all instructions are 4 bytes in size. This is true
942 // for AArch64, but it might be good to extract this function so it can be
943 // used elsewhere and for other targets too.
944 uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I;
945 const MCInst &Inst = BB->getInstructionAtIndex(Index: I);
946 if (BC.MIB->isCFI(Inst))
947 continue;
948 BC.printInstruction(OS&: outs(), Instruction: Inst, Offset: Address, Function: BF);
949 }
950}
951
952static void reportFoundGadgetInSingleBBSingleRelatedInst(
953 raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
954 const MCInstReference Location) {
955 BinaryBasicBlock *BB = Location.getBasicBlock();
956 assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
957 assert(Location.ParentKind == MCInstReference::BasicBlockParent);
958 MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
959 if (BB == RelatedInstBB.BB) {
960 OS << " This happens in the following basic block:\n";
961 printBB(BC, BB);
962 }
963}
964
965void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
966 StringRef IssueKind) const {
967 BinaryFunction *BF = Location.getFunction();
968 BinaryBasicBlock *BB = Location.getBasicBlock();
969
970 OS << "\nGS-PAUTH: " << IssueKind;
971 OS << " in function " << BF->getPrintName();
972 if (BB)
973 OS << ", basic block " << BB->getName();
974 OS << ", at address " << llvm::format(Fmt: "%x", Vals: Location.getAddress()) << "\n";
975 OS << " The instruction is ";
976 BC.printInstruction(OS, Instruction: Location, Offset: Location.getAddress(), Function: BF);
977}
978
979void GadgetDiagnostic::generateReport(raw_ostream &OS,
980 const BinaryContext &BC) const {
981 printBasicInfo(OS, BC, IssueKind: Kind.getDescription());
982}
983
984static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
985 ArrayRef<MCInstReference> RelatedInstrs) {
986 const BinaryFunction &BF = *Location.getFunction();
987 const BinaryContext &BC = BF.getBinaryContext();
988
989 // Sort by address to ensure output is deterministic.
990 SmallVector<MCInstReference> RI(RelatedInstrs);
991 llvm::sort(C&: RI, Comp: [](const MCInstReference &A, const MCInstReference &B) {
992 return A.getAddress() < B.getAddress();
993 });
994 for (unsigned I = 0; I < RI.size(); ++I) {
995 MCInstReference InstRef = RI[I];
996 OS << " " << (I + 1) << ". ";
997 BC.printInstruction(OS, Instruction: InstRef, Offset: InstRef.getAddress(), Function: &BF);
998 };
999 if (RelatedInstrs.size() == 1) {
1000 const MCInstReference RelatedInst = RelatedInstrs[0];
1001 // Printing the details for the MCInstReference::FunctionParent case
1002 // is not implemented not to overcomplicate the code, as most functions
1003 // are expected to have CFG information.
1004 if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent)
1005 reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst,
1006 Location);
1007 }
1008}
1009
1010void ClobberingInfo::print(raw_ostream &OS,
1011 const MCInstReference Location) const {
1012 OS << " The " << ClobberingInstrs.size()
1013 << " instructions that write to the affected registers after any "
1014 "authentication are:\n";
1015 printRelatedInstrs(OS, Location, RelatedInstrs: ClobberingInstrs);
1016}
1017
1018void GenericDiagnostic::generateReport(raw_ostream &OS,
1019 const BinaryContext &BC) const {
1020 printBasicInfo(OS, BC, IssueKind: Text);
1021}
1022
1023Error Analysis::runOnFunctions(BinaryContext &BC) {
1024 ParallelUtilities::WorkFuncWithAllocTy WorkFun =
1025 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
1026 runOnFunction(BF, AllocatorId);
1027 };
1028
1029 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
1030 return false;
1031 };
1032
1033 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
1034 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun,
1035 SkipPredicate: SkipFunc, LogName: "PAuthGadgetScanner");
1036
1037 for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
1038 if (!AnalysisResults.count(x: BF))
1039 continue;
1040 for (const FinalReport &R : AnalysisResults[BF].Diagnostics) {
1041 R.Issue->generateReport(OS&: outs(), BC);
1042 if (R.Details)
1043 R.Details->print(OS&: outs(), Location: R.Issue->Location);
1044 }
1045 }
1046 return Error::success();
1047}
1048
1049} // namespace PAuthGadgetScanner
1050} // namespace bolt
1051} // namespace llvm
1052

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of bolt/lib/Passes/PAuthGadgetScanner.cpp