PAuthGadgetScanner.cpp source code [bolt/lib/Passes/PAuthGadgetScanner.cpp]

1	//===- bolt/Passes/PAuthGadgetScanner.cpp ---------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a pass that looks for any AArch64 return instructions
10	// that may not be protected by PAuth authentication instructions when needed.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "bolt/Passes/PAuthGadgetScanner.h"
15	#include "bolt/Core/ParallelUtilities.h"
16	#include "bolt/Passes/DataflowAnalysis.h"
17	#include "llvm/ADT/STLExtras.h"
18	#include "llvm/ADT/SmallSet.h"
19	#include "llvm/MC/MCInst.h"
20	#include "llvm/Support/Format.h"
21	#include <memory>
22
23	#define DEBUG_TYPE "bolt-pauth-scanner"
24
25	namespace llvm {
26	namespace bolt {
27
28	raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
29	OS << "MCInstBBRef<";
30	if (Ref.BB == nullptr)
31	OS << "BB:(null)";
32	else
33	OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
34	OS << ">";
35	return OS;
36	}
37
38	raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
39	OS << "MCInstBFRef<";
40	if (Ref.BF == nullptr)
41	OS << "BF:(null)";
42	else
43	OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
44	OS << ">";
45	return OS;
46	}
47
48	raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
49	switch (Ref.ParentKind) {
50	case MCInstReference::BasicBlockParent:
51	OS << Ref.U.BBRef;
52	return OS;
53	case MCInstReference::FunctionParent:
54	OS << Ref.U.BFRef;
55	return OS;
56	}
57	llvm_unreachable("");
58	}
59
60	namespace PAuthGadgetScanner {
61
62	[[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
63	const MCInst &MI) {
64	dbgs() << " " << Label << ": ";
65	BC.printInstruction(OS&: dbgs(), Instruction: MI);
66	}
67
68	[[maybe_unused]] static void traceReg(const BinaryContext &BC, StringRef Label,
69	MCPhysReg Reg) {
70	dbgs() << " " << Label << ": ";
71	if (Reg == BC.MIB ->getNoRegister())
72	dbgs() << "(none)";
73	else
74	dbgs() << BC.MRI ->getName(RegNo: Reg);
75	dbgs() << "\n";
76	}
77
78	[[maybe_unused]] static void traceRegMask(const BinaryContext &BC,
79	StringRef Label, BitVector Mask) {
80	dbgs() << " " << Label << ": ";
81	RegStatePrinter (BC).print(OS&: dbgs(), State: Mask);
82	dbgs() << "\n";
83	}
84
85	// This class represents mapping from a set of arbitrary physical registers to
86	// consecutive array indexes.
87	class TrackedRegisters {
88	static constexpr uint16_t NoIndex = -`1`;
89	const std::vector<MCPhysReg> Registers;
90	std::vector<uint16_t> RegToIndexMapping;
91
92	static size_t getMappingSize(ArrayRef<MCPhysReg> RegsToTrack) {
93	if (RegsToTrack.empty())
94	return `0`;
95	return `1` + *llvm::max_element(Range&: RegsToTrack);
96	}
97
98	public:
99	TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack)
100	: Registers(RegsToTrack),
101	RegToIndexMapping (getMappingSize(RegsToTrack), NoIndex) {
102	for (unsigned I = `0`; I < RegsToTrack.size(); ++I)
103	RegToIndexMapping [RegsToTrack [I]] = I;
104	}
105
106	ArrayRef<MCPhysReg> getRegisters() const { return Registers; }
107
108	size_t getNumTrackedRegisters() const { return Registers.size(); }
109
110	bool empty() const { return Registers.empty(); }
111
112	bool isTracked(MCPhysReg Reg) const {
113	bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() &&
114	RegToIndexMapping [Reg] != NoIndex;
115	assert(IsTracked == llvm::is_contained(Registers, Reg));
116	return IsTracked;
117	}
118
119	unsigned getIndex(MCPhysReg Reg) const {
120	assert(isTracked(Reg) && "Register is not tracked");
121	return RegToIndexMapping [Reg];
122	}
123	};
124
125	// The security property that is checked is:
126	// When a register is used as the address to jump to in a return instruction,
127	// that register must be safe-to-dereference. It must either
128	// (a) be safe-to-dereference at function entry and never be changed within this
129	// function, i.e. have the same value as when the function started, or
130	// (b) the last write to the register must be by an authentication instruction.
131
132	// This property is checked by using dataflow analysis to keep track of which
133	// registers have been written (def-ed), since last authenticated. For pac-ret,
134	// any return instruction using a register which is not safe-to-dereference is
135	// a gadget to be reported. For PAuthABI, probably at least any indirect control
136	// flow using such a register should be reported.
137
138	// Furthermore, when producing a diagnostic for a found non-pac-ret protected
139	// return, the analysis also lists the last instructions that wrote to the
140	// register used in the return instruction.
141	// The total set of registers used in return instructions in a given function is
142	// small. It almost always is just `X30`.
143	// In order to reduce the memory consumption of storing this additional state
144	// during the dataflow analysis, this is computed by running the dataflow
145	// analysis twice:
146	// 1. In the first run, the dataflow analysis only keeps track of the security
147	// property: i.e. which registers have been overwritten since the last
148	// time they've been authenticated.
149	// 2. If the first run finds any return instructions using a register last
150	// written by a non-authenticating instruction, the dataflow analysis will
151	// be run a second time. The first run will return which registers are used
152	// in the gadgets to be reported. This information is used in the second run
153	// to also track which instructions last wrote to those registers.
154
155	/// A state representing which registers are safe to use by an instruction
156	/// at a given program point.
157	///
158	/// To simplify reasoning, let's stick with the following approach:
159	/// when state is updated by the data-flow analysis, the sub-, super- and*
160	/// overlapping registers are marked as needed
161	/// when the particular instruction is checked if it represents a gadget,*
162	/// the specific bit of BitVector should be usable to answer this.
163	///
164	/// For example, on AArch64:
165	/// An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as*
166	/// safe-to-dereference. It does not change the state of X0_X1, for example,
167	/// as super-registers partially retain their old, unsafe values.
168	/// LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps*
169	/// with: W1, W1_HI, X0_X1 and so on.
170	/// RET (which is implicitly RET X30) is a protected return if and only if*
171	/// X30 is safe-to-dereference - the state computed for sub- and
172	/// super-registers is not inspected.
173	struct SrcState {
174	/// A BitVector containing the registers that are either authenticated
175	/// (assuming failed authentication is permitted to produce an invalid
176	/// address, provided it generates an error on memory access) or whose
177	/// value is known not to be attacker-controlled under Pointer Authentication
178	/// threat model. The registers in this set are either
179	/// not clobbered since being authenticated, or*
180	/// trusted at function entry and were not clobbered yet, or*
181	/// contain a safely materialized address.*
182	BitVector SafeToDerefRegs;
183	/// A BitVector containing the registers that are either authenticated
184	/// successfully* or whose value is known not to be attacker-controlled*
185	/// under Pointer Authentication threat model.
186	/// The registers in this set are either
187	/// authenticated and then checked to be authenticated successfully*
188	/// (and not clobbered since then), or
189	/// trusted at function entry and were not clobbered yet, or*
190	/// contain a safely materialized address.*
191	BitVector TrustedRegs;
192	/// A vector of sets, only used in the second data flow run.
193	/// Each element in the vector represents one of the registers for which we
194	/// track the set of last instructions that wrote to this register. For
195	/// pac-ret analysis, the expectation is that almost all return instructions
196	/// only use register `X30`, and therefore, this vector will probably have
197	/// length 1 in the second run.
198	std::vector<SmallPtrSet<const MCInst *, `4`>> LastInstWritingReg;
199
200	/// Construct an empty state.
201	SrcState() {}
202
203	SrcState(unsigned NumRegs, unsigned NumRegsToTrack)
204	: SafeToDerefRegs (NumRegs), TrustedRegs (NumRegs),
205	LastInstWritingReg (NumRegsToTrack) {}
206
207	SrcState &merge(const SrcState &StateIn) {
208	if (StateIn.empty())
209	return *this;
210	if (empty())
211	return (*this = StateIn);
212
213	SafeToDerefRegs &= StateIn.SafeToDerefRegs;
214	TrustedRegs &= StateIn.TrustedRegs;
215	for (unsigned I = `0`; I < LastInstWritingReg.size(); ++I)
216	for (const MCInst *J : StateIn.LastInstWritingReg [I])
217	LastInstWritingReg [I].insert(Ptr: J);
218	return *this;
219	}
220
221	/// Returns true if this object does not store state of any registers -
222	/// neither safe, nor unsafe ones.
223	bool empty() const { return SafeToDerefRegs.empty(); }
224
225	bool operator==(const SrcState &RHS) const {
226	return SafeToDerefRegs == RHS.SafeToDerefRegs &&
227	TrustedRegs == RHS.TrustedRegs &&
228	LastInstWritingReg == RHS.LastInstWritingReg;
229	}
230	bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); }
231	};
232
233	static void
234	printLastInsts(raw_ostream &OS,
235	ArrayRef<SmallPtrSet<const MCInst *, `4`>> LastInstWritingReg) {
236	OS << "Insts: ";
237	for (unsigned I = `0`; I < LastInstWritingReg.size(); ++I) {
238	auto &Set = LastInstWritingReg [I];
239	OS << "[" << I << "](";
240	for (const MCInst *MCInstP : Set)
241	OS << MCInstP << " ";
242	OS << ")";
243	}
244	}
245
246	raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) {
247	OS << "src-state<";
248	if (S.empty()) {
249	OS << "empty";
250	} else {
251	OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", ";
252	OS << "TrustedRegs: " << S.TrustedRegs << ", ";
253	printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg);
254	}
255	OS << ">";
256	return OS;
257	}
258
259	class SrcStatePrinter {
260	public:
261	void print(raw_ostream &OS, const SrcState &State) const;
262	explicit SrcStatePrinter(const BinaryContext &BC) : BC(BC) {}
263
264	private:
265	const BinaryContext &BC;
266	};
267
268	void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const {
269	RegStatePrinter RegStatePrinter(BC);
270	OS << "src-state<";
271	if (S.empty()) {
272	assert(S.SafeToDerefRegs.empty());
273	assert(S.TrustedRegs.empty());
274	assert(S.LastInstWritingReg.empty());
275	OS << "empty";
276	} else {
277	OS << "SafeToDerefRegs: ";
278	RegStatePrinter.print(OS, State: S.SafeToDerefRegs);
279	OS << ", TrustedRegs: ";
280	RegStatePrinter.print(OS, State: S.TrustedRegs);
281	OS << ", ";
282	printLastInsts(OS, LastInstWritingReg: S.LastInstWritingReg);
283	}
284	OS << ">";
285	}
286
287	/// Computes which registers are safe to be used by control flow and signing
288	/// instructions.
289	///
290	/// This is the base class for two implementations: a dataflow-based analysis
291	/// which is intended to be used for most functions and a simplified CFG-unaware
292	/// version for functions without reconstructed CFG.
293	class SrcSafetyAnalysis {
294	public:
295	SrcSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor)
296	: BC(BF.getBinaryContext()), NumRegs(BC.MRI ->getNumRegs()),
297	RegsToTrackInstsFor (RegsToTrackInstsFor) {}
298
299	virtual ~SrcSafetyAnalysis() {}
300
301	static std::shared_ptr<SrcSafetyAnalysis>
302	create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
303	ArrayRef<MCPhysReg> RegsToTrackInstsFor);
304
305	virtual void run() = `0`;
306	virtual const SrcState &getStateBefore(const MCInst &Inst) const = `0`;
307
308	protected:
309	BinaryContext &BC;
310	const unsigned NumRegs;
311	/// RegToTrackInstsFor is the set of registers for which the dataflow analysis
312	/// must compute which the last set of instructions writing to it are.
313	const TrackedRegisters RegsToTrackInstsFor;
314	/// Stores information about the detected instruction sequences emitted to
315	/// check an authenticated pointer. Specifically, if such sequence is detected
316	/// in a basic block, it maps the last instruction of that basic block to
317	/// (CheckedRegister, FirstInstOfTheSequence) pair, see the description of
318	/// MCPlusBuilder::getAuthCheckedReg(BB) method.
319	///
320	/// As the detection of such sequences requires iterating over the adjacent
321	/// instructions, it should be done before calling computeNext(), which
322	/// operates on separate instructions.
323	DenseMap<const MCInst , std::pair<MCPhysReg, const* MCInst *>>
324	CheckerSequenceInfo;
325
326	SmallPtrSet<const MCInst *, `4`> &lastWritingInsts(SrcState &S,
327	MCPhysReg Reg) const {
328	unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
329	return S.LastInstWritingReg [Index];
330	}
331	const SmallPtrSet<const MCInst , `4`> &lastWritingInsts(const* SrcState &S,
332	MCPhysReg Reg) const {
333	unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
334	return S.LastInstWritingReg [Index];
335	}
336
337	SrcState createEntryState() {
338	SrcState S(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
339	for (MCPhysReg Reg : BC.MIB ->getTrustedLiveInRegs())
340	S.TrustedRegs \|= BC.MIB ->getAliases(Reg, /OnlySmaller=/true);
341	S.SafeToDerefRegs = S.TrustedRegs;
342	return S;
343	}
344
345	BitVector getClobberedRegs(const MCInst &Point) const {
346	BitVector Clobbered(NumRegs);
347	// Assume a call can clobber all registers, including callee-saved
348	// registers. There's a good chance that callee-saved registers will be
349	// saved on the stack at some point during execution of the callee.
350	// Therefore they should also be considered as potentially modified by an
351	// attacker/written to.
352	// Also, not all functions may respect the AAPCS ABI rules about
353	// caller/callee-saved registers.
354	if (BC.MIB ->isCall(Inst: Point))
355	Clobbered.set();
356	else
357	BC.MIB ->getClobberedRegs(Inst: Point, Regs&: Clobbered);
358	return Clobbered;
359	}
360
361	// Returns all registers that can be treated as if they are written by an
362	// authentication instruction.
363	SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
364	const SrcState &Cur) const {
365	SmallVector<MCPhysReg> Regs;
366
367	// A signed pointer can be authenticated, or
368	bool Dummy = false;
369	if (auto AutReg = BC.MIB ->getWrittenAuthenticatedReg(Inst: Point, IsChecked&: Dummy))
370	Regs.push_back(Elt: *AutReg);
371
372	// ... a safe address can be materialized, or
373	if (auto NewAddrReg = BC.MIB ->getMaterializedAddressRegForPtrAuth(Inst: Point))
374	Regs.push_back(Elt: *NewAddrReg);
375
376	// ... an address can be updated in a safe manner, producing the result
377	// which is as trusted as the input address.
378	if (auto DstAndSrc = BC.MIB ->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
379	if (Cur.SafeToDerefRegs [DstAndSrc ->second])
380	Regs.push_back(Elt: DstAndSrc ->first);
381	}
382
383	return Regs;
384	}
385
386	// Returns all registers made trusted by this instruction.
387	SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
388	const SrcState &Cur) const {
389	SmallVector<MCPhysReg> Regs;
390
391	// An authenticated pointer can be checked, or
392	std::optional<MCPhysReg> CheckedReg =
393	BC.MIB ->getAuthCheckedReg(Inst: Point, /MayOverwrite=/false);
394	if (CheckedReg && Cur.SafeToDerefRegs [*CheckedReg])
395	Regs.push_back(Elt: *CheckedReg);
396
397	// ... a pointer can be authenticated by an instruction that always checks
398	// the pointer, or
399	bool IsChecked = false;
400	std::optional<MCPhysReg> AutReg =
401	BC.MIB ->getWrittenAuthenticatedReg(Inst: Point, IsChecked);
402	if (AutReg && IsChecked)
403	Regs.push_back(Elt: *AutReg);
404
405	if (CheckerSequenceInfo.contains(Val: &Point)) {
406	MCPhysReg CheckedReg;
407	const MCInst *FirstCheckerInst;
408	std::tie(args&: CheckedReg, args&: FirstCheckerInst) = CheckerSequenceInfo.at(Val: &Point);
409
410	// FirstCheckerInst should belong to the same basic block (see the
411	// assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
412	// deterministically processed a few steps before this instruction.
413	const SrcState &StateBeforeChecker = getStateBefore(Inst: *FirstCheckerInst);
414	if (StateBeforeChecker.SafeToDerefRegs [CheckedReg])
415	Regs.push_back(Elt: CheckedReg);
416	}
417
418	// ... a safe address can be materialized, or
419	if (auto NewAddrReg = BC.MIB ->getMaterializedAddressRegForPtrAuth(Inst: Point))
420	Regs.push_back(Elt: *NewAddrReg);
421
422	// ... an address can be updated in a safe manner, producing the result
423	// which is as trusted as the input address.
424	if (auto DstAndSrc = BC.MIB ->analyzeAddressArithmeticsForPtrAuth(Inst: Point)) {
425	if (Cur.TrustedRegs [DstAndSrc ->second])
426	Regs.push_back(Elt: DstAndSrc ->first);
427	}
428
429	return Regs;
430	}
431
432	SrcState computeNext(const MCInst &Point, const SrcState &Cur) {
433	SrcStatePrinter P(BC);
434	LLVM_DEBUG({
435	dbgs() << " SrcSafetyAnalysis::ComputeNext(";
436	BC.InstPrinter ->printInst(&const_cast<MCInst &>(Point), `0`, "", *BC.STI,
437	dbgs());
438	dbgs() << ", ";
439	P.print(dbgs(), Cur);
440	dbgs() << ")\n";
441	});
442
443	// If this instruction is reachable, a non-empty state will be propagated
444	// to it from the entry basic block sooner or later. Until then, it is both
445	// more efficient and easier to reason about to skip computeNext().
446	if (Cur.empty()) {
447	LLVM_DEBUG(
448	{ dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; });
449	return SrcState ();
450	}
451
452	// First, compute various properties of the instruction, taking the state
453	// before its execution into account, if necessary.
454
455	BitVector Clobbered = getClobberedRegs(Point);
456	SmallVector<MCPhysReg> NewSafeToDerefRegs =
457	getRegsMadeSafeToDeref(Point, Cur);
458	SmallVector<MCPhysReg> NewTrustedRegs = getRegsMadeTrusted(Point, Cur);
459
460	// Ideally, being trusted is a strictly stronger property than being
461	// safe-to-dereference. To simplify the computation of Next state, enforce
462	// this for NewSafeToDerefRegs and NewTrustedRegs. Additionally, this
463	// fixes the properly for "cumulative" register states in tricky cases
464	// like the following:
465	//
466	// ; LR is safe to dereference here
467	// mov x16, x30 ; start of the sequence, LR is s-t-d right before
468	// xpaclri ; clobbers LR, LR is not safe anymore
469	// cmp x30, x16
470	// b.eq 1f ; end of the sequence: LR is marked as trusted
471	// brk 0x1234
472	// 1:
473	// ; at this point LR would be marked as trusted,
474	// ; but not safe-to-dereference
475	//
476	for (auto TrustedReg : NewTrustedRegs) {
477	if (!is_contained(Range&: NewSafeToDerefRegs, Element: TrustedReg))
478	NewSafeToDerefRegs.push_back(Elt: TrustedReg);
479	}
480
481	// Then, compute the state after this instruction is executed.
482	SrcState Next = Cur;
483
484	Next.SafeToDerefRegs.reset(RHS: Clobbered);
485	Next.TrustedRegs.reset(RHS: Clobbered);
486	// Keep track of this instruction if it writes to any of the registers we
487	// need to track that for:
488	for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters())
489	if (Clobbered [Reg])
490	lastWritingInsts(S&: Next, Reg) = {&Point};
491
492	// After accounting for clobbered registers in general, override the state
493	// according to authentication and other special cases* of clobbering.*
494
495	// The sub-registers are also safe-to-dereference now, but not their
496	// super-registers (as they retain untrusted register units).
497	BitVector NewSafeSubregs(NumRegs);
498	for (MCPhysReg SafeReg : NewSafeToDerefRegs)
499	NewSafeSubregs \|= BC.MIB ->getAliases(Reg: SafeReg, /OnlySmaller=/true);
500	for (MCPhysReg Reg : NewSafeSubregs.set_bits()) {
501	Next.SafeToDerefRegs.set(Reg);
502	if (RegsToTrackInstsFor.isTracked(Reg))
503	lastWritingInsts(S&: Next, Reg).clear();
504	}
505
506	// Process new trusted registers.
507	for (MCPhysReg TrustedReg : NewTrustedRegs)
508	Next.TrustedRegs \|= BC.MIB ->getAliases(Reg: TrustedReg, /OnlySmaller=/true);
509
510	LLVM_DEBUG({
511	dbgs() << " .. result: (";
512	P.print(dbgs(), Next);
513	dbgs() << ")\n";
514	});
515
516	return Next;
517	}
518
519	public:
520	std::vector<MCInstReference>
521	getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF,
522	MCPhysReg ClobberedReg) const {
523	const SrcState &S = getStateBefore(Inst);
524
525	std::vector<MCInstReference> Result;
526	for (const MCInst *Inst : lastWritingInsts(S, Reg: ClobberedReg)) {
527	MCInstReference Ref = MCInstReference::get(Inst, BF);
528	assert(Ref && "Expected Inst to be found");
529	Result.push_back(x: Ref);
530	}
531	return Result;
532	}
533	};
534
535	class DataflowSrcSafetyAnalysis
536	: public SrcSafetyAnalysis,
537	public DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState,
538	/Backward=/false, SrcStatePrinter> {
539	using DFParent = DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, false,
540	SrcStatePrinter>;
541	friend DFParent;
542
543	using SrcSafetyAnalysis::BC;
544	using SrcSafetyAnalysis::computeNext;
545
546	public:
547	DataflowSrcSafetyAnalysis(BinaryFunction &BF,
548	MCPlusBuilder::AllocatorIdTy AllocId,
549	ArrayRef<MCPhysReg> RegsToTrackInstsFor)
550	: SrcSafetyAnalysis (BF, RegsToTrackInstsFor), DFParent (BF, AllocId) {}
551
552	const SrcState &getStateBefore(const MCInst &Inst) const override {
553	return DFParent::getStateBefore(Point: Inst).get();
554	}
555
556	void run() override {
557	for (BinaryBasicBlock &BB : Func) {
558	if (auto CheckerInfo = BC.MIB ->getAuthCheckedReg(BB)) {
559	MCPhysReg CheckedReg = CheckerInfo ->first;
560	MCInst &FirstInst = *CheckerInfo ->second;
561	MCInst &LastInst = *BB.getLastNonPseudoInstr();
562	LLVM_DEBUG({
563	dbgs() << "Found pointer checking sequence in " << BB.getName()
564	<< ":\n";
565	traceReg(BC, "Checked register", CheckedReg);
566	traceInst(BC, "First instruction", FirstInst);
567	traceInst(BC, "Last instruction", LastInst);
568	});
569	(void)CheckedReg;
570	(void)FirstInst;
571	assert(llvm::any_of(BB, [&](MCInst &I) { return &I == &FirstInst; }) &&
572	"Data-flow analysis expects the checker not to cross BBs");
573	CheckerSequenceInfo [&LastInst] = *CheckerInfo;
574	}
575	}
576	DFParent::run();
577	}
578
579	protected:
580	void preflight() {}
581
582	SrcState getStartingStateAtBB(const BinaryBasicBlock &BB) {
583	if (BB.isEntryPoint())
584	return createEntryState();
585
586	return SrcState ();
587	}
588
589	SrcState getStartingStateAtPoint(const MCInst &Point) { return SrcState (); }
590
591	void doConfluence(SrcState &StateOut, const SrcState &StateIn) {
592	SrcStatePrinter P(BC);
593	LLVM_DEBUG({
594	dbgs() << " DataflowSrcSafetyAnalysis::Confluence(\n";
595	dbgs() << " State 1: ";
596	P.print(dbgs(), StateOut);
597	dbgs() << "\n";
598	dbgs() << " State 2: ";
599	P.print(dbgs(), StateIn);
600	dbgs() << ")\n";
601	});
602
603	StateOut.merge(StateIn);
604
605	LLVM_DEBUG({
606	dbgs() << " merged state: ";
607	P.print(dbgs(), StateOut);
608	dbgs() << "\n";
609	});
610	}
611
612	StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis"; }
613	};
614
615	// A simplified implementation of DataflowSrcSafetyAnalysis for functions
616	// lacking CFG information.
617	//
618	// Let assume the instructions can only be executed linearly unless there is
619	// a label to jump to - this should handle both directly jumping to a location
620	// encoded as an immediate operand of a branch instruction, as well as saving a
621	// branch destination somewhere and passing it to an indirect branch instruction
622	// later, provided no arithmetic is performed on the destination address:
623	//
624	// ; good: the destination is directly encoded into the branch instruction
625	// cbz x0, some_label
626	//
627	// ; good: the branch destination is first stored and then used as-is
628	// adr x1, some_label
629	// br x1
630	//
631	// ; bad: some clever arithmetic is performed manually
632	// adr x1, some_label
633	// add x1, x1, #4
634	// br x1
635	// ...
636	// some_label:
637	// ; pessimistically reset the state as we are unsure where we came from
638	// ...
639	// ret
640	// JTI0:
641	// .byte some_label - Ltmp0 ; computing offsets using labels may probably
642	// work too, provided enough information is
643	// retained by the assembler and linker
644	//
645	// Then, a function can be split into a number of disjoint contiguous sequences
646	// of instructions without labels in between. These sequences can be processed
647	// the same way basic blocks are processed by data-flow analysis, assuming
648	// pessimistically that all registers are unsafe at the start of each sequence.
649	class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
650	BinaryFunction &BF;
651	MCPlusBuilder::AllocatorIdTy AllocId;
652	unsigned StateAnnotationIndex;
653
654	void cleanStateAnnotations() {
655	for (auto &I : BF.instrs())
656	BC.MIB ->removeAnnotation(Inst&: I.second, Index: StateAnnotationIndex);
657	}
658
659	/// Creates a state with all registers marked unsafe (not to be confused
660	/// with empty state).
661	SrcState createUnsafeState() const {
662	return SrcState (NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
663	}
664
665	public:
666	CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
667	MCPlusBuilder::AllocatorIdTy AllocId,
668	ArrayRef<MCPhysReg> RegsToTrackInstsFor)
669	: SrcSafetyAnalysis (BF, RegsToTrackInstsFor), BF(BF), AllocId(AllocId) {
670	StateAnnotationIndex =
671	BC.MIB ->getOrCreateAnnotationIndex(Name: "CFGUnawareSrcSafetyAnalysis");
672	}
673
674	void run() override {
675	SrcState S = createEntryState();
676	for (auto &I : BF.instrs()) {
677	MCInst &Inst = I.second;
678
679	// If there is a label before this instruction, it is possible that it
680	// can be jumped-to, thus conservatively resetting S. As an exception,
681	// let's ignore any labels at the beginning of the function, as at least
682	// one label is expected there.
683	if (BF.hasLabelAt(Offset: I.first) && &Inst != &BF.instrs().begin()->second) {
684	LLVM_DEBUG({
685	traceInst(BC, "Due to label, resetting the state before", Inst);
686	});
687	S = createUnsafeState();
688	}
689
690	// Check if we need to remove an old annotation (this is the case if
691	// this is the second, detailed, run of the analysis).
692	if (BC.MIB ->hasAnnotation(Inst, Index: StateAnnotationIndex))
693	BC.MIB ->removeAnnotation(Inst, Index: StateAnnotationIndex);
694	// Attach the state before* this instruction executes.*
695	BC.MIB ->addAnnotation(Inst, Index: StateAnnotationIndex, Val: S, AllocatorId: AllocId);
696
697	// Compute the state after this instruction executes.
698	S = computeNext(Point: Inst, Cur: S);
699	}
700	}
701
702	const SrcState &getStateBefore(const MCInst &Inst) const override {
703	return BC.MIB ->getAnnotationAs<SrcState>(Inst, Index: StateAnnotationIndex);
704	}
705
706	~CFGUnawareSrcSafetyAnalysis() { cleanStateAnnotations(); }
707	};
708
709	std::shared_ptr<SrcSafetyAnalysis>
710	SrcSafetyAnalysis::create(BinaryFunction &BF,
711	MCPlusBuilder::AllocatorIdTy AllocId,
712	ArrayRef<MCPhysReg> RegsToTrackInstsFor) {
713	if (BF.hasCFG())
714	return std::make_shared<DataflowSrcSafetyAnalysis>(args&: BF, args&: AllocId,
715	args&: RegsToTrackInstsFor);
716	return std::make_shared<CFGUnawareSrcSafetyAnalysis>(args&: BF, args&: AllocId,
717	args&: RegsToTrackInstsFor);
718	}
719
720	// This function could return PartialReport<T>, but currently T is always
721	// MCPhysReg, even though it is an implementation detail.
722	static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location,
723	StringRef Text) {
724	auto Report = std::make_shared<GenericDiagnostic>(args&: Location, args&: Text);
725	return PartialReport<MCPhysReg>(Report, std::nullopt);
726	}
727
728	template <typename T>
729	static PartialReport<T> make_gadget_report(const GadgetKind &Kind,
730	MCInstReference Location,
731	T RequestedDetails) {
732	auto Report = std::make_shared<GadgetDiagnostic>(args: Kind, args&: Location);
733	return PartialReport<T>(Report, RequestedDetails);
734	}
735
736	static std::optional<PartialReport<MCPhysReg>>
737	shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
738	const SrcState &S) {
739	static const GadgetKind RetKind("non-protected ret found");
740	if (!BC.MIB ->isReturn(Inst))
741	return std::nullopt;
742
743	bool IsAuthenticated = false;
744	std::optional<MCPhysReg> RetReg =
745	BC.MIB ->getRegUsedAsRetDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
746	if (!RetReg) {
747	return make_generic_report(
748	Location: Inst, Text: "Warning: pac-ret analysis could not analyze this return "
749	"instruction");
750	}
751	if (IsAuthenticated)
752	return std::nullopt;
753
754	LLVM_DEBUG({
755	traceInst(BC, "Found RET inst", Inst);
756	traceReg(BC, "RetReg", *RetReg);
757	traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
758	});
759
760	if (S.SafeToDerefRegs [*RetReg])
761	return std::nullopt;
762
763	return make_gadget_report(Kind: RetKind, Location: Inst, RequestedDetails: *RetReg);
764	}
765
766	static std::optional<PartialReport<MCPhysReg>>
767	shouldReportCallGadget(const BinaryContext &BC, const MCInstReference &Inst,
768	const SrcState &S) {
769	static const GadgetKind CallKind("non-protected call found");
770	if (!BC.MIB ->isIndirectCall(Inst) && !BC.MIB ->isIndirectBranch(Inst))
771	return std::nullopt;
772
773	bool IsAuthenticated = false;
774	MCPhysReg DestReg =
775	BC.MIB ->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticatedInternally&: IsAuthenticated);
776	if (IsAuthenticated)
777	return std::nullopt;
778
779	assert(DestReg != BC.MIB ->getNoRegister() && "Valid register expected");
780	LLVM_DEBUG({
781	traceInst(BC, "Found call inst", Inst);
782	traceReg(BC, "Call destination reg", DestReg);
783	traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
784	});
785	if (S.SafeToDerefRegs [DestReg])
786	return std::nullopt;
787
788	return make_gadget_report(Kind: CallKind, Location: Inst, RequestedDetails: DestReg);
789	}
790
791	static std::optional<PartialReport<MCPhysReg>>
792	shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst,
793	const SrcState &S) {
794	static const GadgetKind SigningOracleKind("signing oracle found");
795
796	std::optional<MCPhysReg> SignedReg = BC.MIB ->getSignedReg(Inst);
797	if (!SignedReg)
798	return std::nullopt;
799
800	LLVM_DEBUG({
801	traceInst(BC, "Found sign inst", Inst);
802	traceReg(BC, "Signed reg", *SignedReg);
803	traceRegMask(BC, "TrustedRegs", S.TrustedRegs);
804	});
805	if (S.TrustedRegs [*SignedReg])
806	return std::nullopt;
807
808	return make_gadget_report(Kind: SigningOracleKind, Location: Inst, RequestedDetails: *SignedReg);
809	}
810
811	template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
812	if (BF.hasCFG()) {
813	for (BinaryBasicBlock &BB : BF)
814	for (int64_t I = `0`, E = BB.size(); I < E; ++I)
815	Fn(MCInstInBBReference (&BB, I));
816	} else {
817	for (auto I : BF.instrs())
818	Fn(MCInstInBFReference (&BF, I.first));
819	}
820	}
821
822	static SmallVector<MCPhysReg>
823	collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
824	SmallSet<MCPhysReg, `4`> RegsToTrack;
825	for (auto Report : Reports)
826	if (Report.RequestedDetails)
827	RegsToTrack.insert(V: *Report.RequestedDetails);
828
829	return SmallVector<MCPhysReg>(RegsToTrack.begin(), RegsToTrack.end());
830	}
831
832	void FunctionAnalysisContext::findUnsafeUses(
833	SmallVector<PartialReport<MCPhysReg>> &Reports) {
834	auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: {});
835	LLVM_DEBUG({ dbgs() << "Running src register safety analysis...\n"; });
836	Analysis ->run();
837	LLVM_DEBUG({
838	dbgs() << "After src register safety analysis:\n";
839	BF.dump();
840	});
841
842	iterateOverInstrs(BF, Fn: [&](MCInstReference Inst) {
843	const SrcState &S = Analysis ->getStateBefore(Inst);
844
845	// If non-empty state was never propagated from the entry basic block
846	// to Inst, assume it to be unreachable and report a warning.
847	if (S.empty()) {
848	Reports.push_back(
849	Elt: make_generic_report(Location: Inst, Text: "Warning: unreachable instruction found"));
850	return;
851	}
852
853	if (auto Report = shouldReportReturnGadget(BC, Inst, S))
854	Reports.push_back(Elt: *Report);
855
856	if (PacRetGadgetsOnly)
857	return;
858
859	if (auto Report = shouldReportCallGadget(BC, Inst, S))
860	Reports.push_back(Elt: *Report);
861	if (auto Report = shouldReportSigningOracle(BC, Inst, S))
862	Reports.push_back(Elt: *Report);
863	});
864	}
865
866	void FunctionAnalysisContext::augmentUnsafeUseReports(
867	ArrayRef<PartialReport<MCPhysReg>> Reports) {
868	SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
869	// Re-compute the analysis with register tracking.
870	auto Analysis = SrcSafetyAnalysis::create(BF, AllocId: AllocatorId, RegsToTrackInstsFor: RegsToTrack);
871	LLVM_DEBUG(
872	{ dbgs() << "\nRunning detailed src register safety analysis...\n"; });
873	Analysis ->run();
874	LLVM_DEBUG({
875	dbgs() << "After detailed src register safety analysis:\n";
876	BF.dump();
877	});
878
879	// Augment gadget reports.
880	for (auto &Report : Reports) {
881	MCInstReference Location = Report.Issue ->Location;
882	LLVM_DEBUG({ traceInst(BC, "Attaching clobbering info to", Location); });
883	assert(Report.RequestedDetails &&
884	"Should be removed by handleSimpleReports");
885	auto DetailedInfo =
886	std::make_shared<ClobberingInfo>(args: Analysis ->getLastClobberingInsts(
887	Inst: Location, BF, ClobberedReg: *Report.RequestedDetails));
888	Result.Diagnostics.emplace_back(args: Report.Issue, args&: DetailedInfo);
889	}
890	}
891
892	void FunctionAnalysisContext::handleSimpleReports(
893	SmallVector<PartialReport<MCPhysReg>> &Reports) {
894	// Before re-running the detailed analysis, process the reports which do not
895	// need any additional details to be attached.
896	for (auto &Report : Reports) {
897	if (!Report.RequestedDetails)
898	Result.Diagnostics.emplace_back(args&: Report.Issue, args: nullptr);
899	}
900	llvm::erase_if(C&: Reports, P: [](const auto &R) { return !R.RequestedDetails; });
901	}
902
903	void FunctionAnalysisContext::run() {
904	LLVM_DEBUG({
905	dbgs() << "Analyzing function " << BF.getPrintName()
906	<< ", AllocatorId = " << AllocatorId << "\n";
907	BF.dump();
908	});
909
910	SmallVector<PartialReport<MCPhysReg>> UnsafeUses;
911	findUnsafeUses(Reports&: UnsafeUses);
912	handleSimpleReports(Reports&: UnsafeUses);
913	if (!UnsafeUses.empty())
914	augmentUnsafeUseReports(Reports: UnsafeUses);
915	}
916
917	void Analysis::runOnFunction(BinaryFunction &BF,
918	MCPlusBuilder::AllocatorIdTy AllocatorId) {
919	FunctionAnalysisContext FA(BF, AllocatorId, PacRetGadgetsOnly);
920	FA.run();
921
922	const FunctionAnalysisResult &FAR = FA.getResult();
923	if (FAR.Diagnostics.empty())
924	return;
925
926	// `runOnFunction` is typically getting called from multiple threads in
927	// parallel. Therefore, use a lock to avoid data races when storing the
928	// result of the analysis in the `AnalysisResults` map.
929	{
930	std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
931	AnalysisResults [&BF] = FAR;
932	}
933	}
934
935	static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
936	size_t StartIndex = `0`, size_t EndIndex = -`1`) {
937	if (EndIndex == (size_t)-`1`)
938	EndIndex = BB->size() - `1`;
939	const BinaryFunction *BF = BB->getFunction();
940	for (unsigned I = StartIndex; I <= EndIndex; ++I) {
941	// FIXME: this assumes all instructions are 4 bytes in size. This is true
942	// for AArch64, but it might be good to extract this function so it can be
943	// used elsewhere and for other targets too.
944	uint64_t Address = BB->getOffset() + BF->getAddress() + `4` * I;
945	const MCInst &Inst = BB->getInstructionAtIndex(Index: I);
946	if (BC.MIB ->isCFI(Inst))
947	continue;
948	BC.printInstruction(OS&: outs(), Instruction: Inst, Offset: Address, Function: BF);
949	}
950	}
951
952	static void reportFoundGadgetInSingleBBSingleRelatedInst(
953	raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
954	const MCInstReference Location) {
955	BinaryBasicBlock *BB = Location.getBasicBlock();
956	assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
957	assert(Location.ParentKind == MCInstReference::BasicBlockParent);
958	MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
959	if (BB == RelatedInstBB.BB) {
960	OS << " This happens in the following basic block:\n";
961	printBB(BC, BB);
962	}
963	}
964
965	void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
966	StringRef IssueKind) const {
967	BinaryFunction *BF = Location.getFunction();
968	BinaryBasicBlock *BB = Location.getBasicBlock();
969
970	OS << "\nGS-PAUTH: " << IssueKind;
971	OS << " in function " << BF->getPrintName();
972	if (BB)
973	OS << ", basic block " << BB->getName();
974	OS << ", at address " << llvm::format(Fmt: "%x", Vals: Location.getAddress()) << "\n";
975	OS << " The instruction is ";
976	BC.printInstruction(OS, Instruction: Location, Offset: Location.getAddress(), Function: BF);
977	}
978
979	void GadgetDiagnostic::generateReport(raw_ostream &OS,
980	const BinaryContext &BC) const {
981	printBasicInfo(OS, BC, IssueKind: Kind.getDescription());
982	}
983
984	static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
985	ArrayRef<MCInstReference> RelatedInstrs) {
986	const BinaryFunction &BF = *Location.getFunction();
987	const BinaryContext &BC = BF.getBinaryContext();
988
989	// Sort by address to ensure output is deterministic.
990	SmallVector<MCInstReference> RI(RelatedInstrs);
991	llvm::sort(C&: RI, Comp: [](const MCInstReference &A, const MCInstReference &B) {
992	return A.getAddress() < B.getAddress();
993	});
994	for (unsigned I = `0`; I < RI.size(); ++I) {
995	MCInstReference InstRef = RI [I];
996	OS << " " << (I + `1`) << ". ";
997	BC.printInstruction(OS, Instruction: InstRef, Offset: InstRef.getAddress(), Function: &BF);
998	};
999	if (RelatedInstrs.size() == `1`) {
1000	const MCInstReference RelatedInst = RelatedInstrs [`0`];
1001	// Printing the details for the MCInstReference::FunctionParent case
1002	// is not implemented not to overcomplicate the code, as most functions
1003	// are expected to have CFG information.
1004	if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent)
1005	reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst,
1006	Location);
1007	}
1008	}
1009
1010	void ClobberingInfo::print(raw_ostream &OS,
1011	const MCInstReference Location) const {
1012	OS << " The " << ClobberingInstrs.size()
1013	<< " instructions that write to the affected registers after any "
1014	"authentication are:\n";
1015	printRelatedInstrs(OS, Location, RelatedInstrs: ClobberingInstrs);
1016	}
1017
1018	void GenericDiagnostic::generateReport(raw_ostream &OS,
1019	const BinaryContext &BC) const {
1020	printBasicInfo(OS, BC, IssueKind: Text);
1021	}
1022
1023	Error Analysis::runOnFunctions(BinaryContext &BC) {
1024	ParallelUtilities::WorkFuncWithAllocTy WorkFun =
1025	[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
1026	runOnFunction(BF, AllocatorId);
1027	};
1028
1029	ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
1030	return false;
1031	};
1032
1033	ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
1034	BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun,
1035	SkipPredicate: SkipFunc, LogName: "PAuthGadgetScanner");
1036
1037	for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
1038	if (!AnalysisResults.count(x: BF))
1039	continue;
1040	for (const FinalReport &R : AnalysisResults [BF].Diagnostics) {
1041	R.Issue ->generateReport(OS&: outs(), BC);
1042	if (R.Details)
1043	R.Details ->print(OS&: outs(), Location: R.Issue ->Location);
1044	}
1045	}
1046	return Error::success();
1047	}
1048
1049	} // namespace PAuthGadgetScanner
1050	} // namespace bolt
1051	} // namespace llvm
1052

Provided by KDAB

Definitions

operator<<
operator<<
operator<<
traceInst
traceReg
traceRegMask
TrackedRegisters
NoIndex
getMappingSize
TrackedRegisters
getRegisters
getNumTrackedRegisters
empty
isTracked
getIndex
SrcState
SrcState
SrcState
merge
empty
operator==
operator!=
printLastInsts
operator<<
SrcStatePrinter
SrcStatePrinter
print
SrcSafetyAnalysis
SrcSafetyAnalysis
~SrcSafetyAnalysis
lastWritingInsts
lastWritingInsts
createEntryState
getClobberedRegs
getRegsMadeSafeToDeref
getRegsMadeTrusted
computeNext
getLastClobberingInsts
DataflowSrcSafetyAnalysis
DataflowSrcSafetyAnalysis
getStateBefore
run
preflight
getStartingStateAtBB
getStartingStateAtPoint
doConfluence
getAnnotationName
CFGUnawareSrcSafetyAnalysis
cleanStateAnnotations
createUnsafeState
CFGUnawareSrcSafetyAnalysis
run
getStateBefore
~CFGUnawareSrcSafetyAnalysis
create
make_generic_report
make_gadget_report
shouldReportReturnGadget
shouldReportCallGadget
shouldReportSigningOracle
iterateOverInstrs
collectRegsToTrack
findUnsafeUses
augmentUnsafeUseReports
handleSimpleReports
run
runOnFunction
printBB
reportFoundGadgetInSingleBBSingleRelatedInst
printBasicInfo
generateReport
printRelatedInstrs
print
generateReport

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

Definitions

source code of bolt/lib/Passes/PAuthGadgetScanner.cpp