1//===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions for handling C++ exception meta data.
10//
11// Some of the code is taken from examples/ExceptionDemo
12//
13//===----------------------------------------------------------------------===//
14
15#include "bolt/Core/Exceptions.h"
16#include "bolt/Core/BinaryFunction.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/BinaryFormat/Dwarf.h"
20#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/Errc.h"
25#include "llvm/Support/LEB128.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28#include <map>
29
30#undef DEBUG_TYPE
31#define DEBUG_TYPE "bolt-exceptions"
32
33using namespace llvm::dwarf;
34
35namespace opts {
36
37extern llvm::cl::OptionCategory BoltCategory;
38
39extern llvm::cl::opt<unsigned> Verbosity;
40
41static llvm::cl::opt<bool>
42 PrintExceptions("print-exceptions",
43 llvm::cl::desc("print exception handling data"),
44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45
46} // namespace opts
47
48namespace llvm {
49namespace bolt {
50
51// Read and dump the .gcc_exception_table section entry.
52//
53// .gcc_except_table section contains a set of Language-Specific Data Areas -
54// a fancy name for exception handling tables. There's one LSDA entry per
55// function. However, we can't actually tell which function LSDA refers to
56// unless we parse .eh_frame entry that refers to the LSDA.
57// Then inside LSDA most addresses are encoded relative to the function start,
58// so we need the function context in order to get to real addresses.
59//
60// The best visual representation of the tables comprising LSDA and
61// relationships between them is illustrated at:
62// https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63// Keep in mind that GCC implementation deviates slightly from that document.
64//
65// To summarize, there are 4 tables in LSDA: call site table, actions table,
66// types table, and types index table (for indirection). The main table contains
67// call site entries. Each call site includes a PC range that can throw an
68// exception, a handler (landing pad), and a reference to an entry in the action
69// table. The handler and/or action could be 0. The action entry is a head
70// of a list of actions associated with a call site. The action table contains
71// all such lists (it could be optimized to share list tails). Each action could
72// be either to catch an exception of a given type, to perform a cleanup, or to
73// propagate the exception after filtering it out (e.g. to make sure function
74// exception specification is not violated). Catch action contains a reference
75// to an entry in the type table, and filter action refers to an entry in the
76// type index table to encode a set of types to filter.
77//
78// Call site table follows LSDA header. Action table immediately follows the
79// call site table.
80//
81// Both types table and type index table start at the same location, but they
82// grow in opposite directions (types go up, indices go down). The beginning of
83// these tables is encoded in LSDA header. Sizes for both of the tables are not
84// included anywhere.
85//
86// We have to parse all of the tables to determine their sizes. Then we have
87// to parse the call site table and associate discovered information with
88// actual call instructions and landing pad blocks.
89//
90// For the purpose of rewriting exception handling tables, we can reuse action,
91// and type index tables in their original binary format.
92//
93// Type table could be encoded using position-independent references, and thus
94// may require relocation.
95//
96// Ideally we should be able to re-write LSDA in-place, without the need to
97// allocate a new space for it. Sadly there's no guarantee that the new call
98// site table will be the same size as GCC uses uleb encodings for PC offsets.
99//
100// Note: some functions have LSDA entries with 0 call site entries.
101Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102 uint64_t LSDASectionAddress) {
103 assert(CurrentState == State::Disassembled && "unexpected function state");
104
105 if (!getLSDAAddress())
106 return Error::success();
107
108 DWARFDataExtractor Data(
109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110 LSDASectionData.size()),
111 BC.DwCtx->getDWARFObj().isLittleEndian(),
112 BC.DwCtx->getDWARFObj().getAddressSize());
113 uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
114 assert(Data.isValidOffset(Offset) && "wrong LSDA address");
115
116 const uint8_t LPStartEncoding = Data.getU8(offset_ptr: &Offset);
117 uint64_t LPStart = Address;
118 if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
119 std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
120 Offset: &Offset, Encoding: LPStartEncoding, AbsPosOffset: Offset + LSDASectionAddress);
121 if (!MaybeLPStart) {
122 BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
123 << (unsigned)LPStartEncoding << '\n';
124 return createFatalBOLTError(S: "");
125 }
126 LPStart = *MaybeLPStart;
127 }
128
129 const uint8_t TTypeEncoding = Data.getU8(offset_ptr: &Offset);
130 LSDATypeEncoding = TTypeEncoding;
131 size_t TTypeEncodingSize = 0;
132 uintptr_t TTypeEnd = 0;
133 if (TTypeEncoding != DW_EH_PE_omit) {
134 TTypeEnd = Data.getULEB128(offset_ptr: &Offset);
135 TTypeEncodingSize = BC.getDWARFEncodingSize(Encoding: TTypeEncoding);
136 }
137
138 if (opts::PrintExceptions) {
139 BC.outs() << "[LSDA at 0x" << Twine::utohexstr(Val: getLSDAAddress())
140 << " for function " << *this << "]:\n";
141 BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(Val: LPStartEncoding)
142 << '\n';
143 BC.outs() << "LPStart = 0x" << Twine::utohexstr(Val: LPStart) << '\n';
144 BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(Val: TTypeEncoding)
145 << '\n';
146 BC.outs() << "TType End = " << TTypeEnd << '\n';
147 }
148
149 // Table to store list of indices in type table. Entries are uleb128 values.
150 const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
151
152 // Offset past the last decoded index.
153 uint64_t MaxTypeIndexTableOffset = 0;
154
155 // Max positive index used in type table.
156 unsigned MaxTypeIndex = 0;
157
158 // The actual type info table starts at the same location, but grows in
159 // opposite direction. TTypeEncoding is used to encode stored values.
160 const uint64_t TypeTableStart = Offset + TTypeEnd;
161
162 uint8_t CallSiteEncoding = Data.getU8(offset_ptr: &Offset);
163 uint32_t CallSiteTableLength = Data.getULEB128(offset_ptr: &Offset);
164 uint64_t CallSiteTableStart = Offset;
165 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
166 uint64_t CallSitePtr = CallSiteTableStart;
167 uint64_t ActionTableStart = CallSiteTableEnd;
168
169 if (opts::PrintExceptions) {
170 BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
171 BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
172 BC.outs() << '\n';
173 }
174
175 this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
176 const uint64_t RangeBase = getAddress();
177 while (CallSitePtr < CallSiteTableEnd) {
178 uint64_t Start = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding,
179 AbsPosOffset: CallSitePtr + LSDASectionAddress);
180 uint64_t Length = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding,
181 AbsPosOffset: CallSitePtr + LSDASectionAddress);
182 uint64_t LandingPad = *Data.getEncodedPointer(
183 Offset: &CallSitePtr, Encoding: CallSiteEncoding, AbsPosOffset: CallSitePtr + LSDASectionAddress);
184 uint64_t ActionEntry = Data.getULEB128(offset_ptr: &CallSitePtr);
185 if (LandingPad)
186 LandingPad += LPStart;
187
188 if (opts::PrintExceptions) {
189 BC.outs() << "Call Site: [0x" << Twine::utohexstr(Val: RangeBase + Start)
190 << ", 0x" << Twine::utohexstr(Val: RangeBase + Start + Length)
191 << "); landing pad: 0x" << Twine::utohexstr(Val: LandingPad)
192 << "; action entry: 0x" << Twine::utohexstr(Val: ActionEntry)
193 << "\n";
194 BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
195 << '\n';
196 }
197
198 // Create a handler entry if necessary.
199 MCSymbol *LPSymbol = nullptr;
200 if (LandingPad) {
201 // Verify if landing pad code is located outside current function
202 // Support landing pad to builtin_unreachable
203 if (LandingPad < Address || LandingPad > Address + getSize()) {
204 BinaryFunction *Fragment =
205 BC.getBinaryFunctionContainingAddress(Address: LandingPad);
206 assert(Fragment != nullptr &&
207 "BOLT-ERROR: cannot find landing pad fragment");
208 BC.addInterproceduralReference(Function: this, Address: Fragment->getAddress());
209 BC.processInterproceduralReferences();
210 assert(isParentOrChildOf(*Fragment) &&
211 "BOLT-ERROR: cannot have landing pads in different functions");
212 setHasIndirectTargetToSplitFragment(true);
213 BC.addFragmentsToSkip(Function: this);
214 return Error::success();
215 }
216
217 const uint64_t LPOffset = LandingPad - getAddress();
218 if (!getInstructionAtOffset(Offset: LPOffset)) {
219 if (opts::Verbosity >= 1)
220 BC.errs() << "BOLT-WARNING: landing pad "
221 << Twine::utohexstr(Val: LPOffset)
222 << " not pointing to an instruction in function " << *this
223 << " - ignoring.\n";
224 } else {
225 auto Label = Labels.find(x: LPOffset);
226 if (Label != Labels.end()) {
227 LPSymbol = Label->second;
228 } else {
229 LPSymbol = BC.Ctx->createNamedTempSymbol(Name: "LP");
230 Labels[LPOffset] = LPSymbol;
231 }
232 }
233 }
234
235 // Mark all call instructions in the range.
236 auto II = Instructions.find(x: Start);
237 auto IE = Instructions.end();
238 assert(II != IE && "exception range not pointing to an instruction");
239 do {
240 MCInst &Instruction = II->second;
241 if (BC.MIB->isCall(Inst: Instruction) &&
242 !BC.MIB->getConditionalTailCall(Inst: Instruction)) {
243 assert(!BC.MIB->isInvoke(Instruction) &&
244 "overlapping exception ranges detected");
245 // Add extra operands to a call instruction making it an invoke from
246 // now on.
247 BC.MIB->addEHInfo(Inst&: Instruction,
248 LP: MCPlus::MCLandingPad(LPSymbol, ActionEntry));
249 }
250 ++II;
251 } while (II != IE && II->first < Start + Length);
252
253 if (ActionEntry != 0) {
254 auto printType = [&](int Index, raw_ostream &OS) {
255 assert(Index > 0 && "only positive indices are valid");
256 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
257 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
258 uint64_t TypeAddress =
259 *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, AbsPosOffset: TTEntryAddress);
260 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
261 TypeAddress = 0;
262 if (TypeAddress == 0) {
263 OS << "<all>";
264 return;
265 }
266 if (TTypeEncoding & DW_EH_PE_indirect) {
267 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress);
268 assert(PointerOrErr && "failed to decode indirect address");
269 TypeAddress = *PointerOrErr;
270 }
271 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(Address: TypeAddress))
272 OS << TypeSymBD->getName();
273 else
274 OS << "0x" << Twine::utohexstr(Val: TypeAddress);
275 };
276 if (opts::PrintExceptions)
277 BC.outs() << " actions: ";
278 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
279 int64_t ActionType;
280 int64_t ActionNext;
281 const char *Sep = "";
282 do {
283 ActionType = Data.getSLEB128(OffsetPtr: &ActionPtr);
284 const uint32_t Self = ActionPtr;
285 ActionNext = Data.getSLEB128(OffsetPtr: &ActionPtr);
286 if (opts::PrintExceptions)
287 BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
288 if (ActionType == 0) {
289 if (opts::PrintExceptions)
290 BC.outs() << "cleanup";
291 } else if (ActionType > 0) {
292 // It's an index into a type table.
293 MaxTypeIndex =
294 std::max(a: MaxTypeIndex, b: static_cast<unsigned>(ActionType));
295 if (opts::PrintExceptions) {
296 BC.outs() << "catch type ";
297 printType(ActionType, BC.outs());
298 }
299 } else { // ActionType < 0
300 if (opts::PrintExceptions)
301 BC.outs() << "filter exception types ";
302 const char *TSep = "";
303 // ActionType is a negative *byte* offset into *uleb128-encoded* table
304 // of indices with base 1.
305 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
306 // encoded using uleb128 thus we cannot directly dereference them.
307 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
308 while (uint64_t Index = Data.getULEB128(offset_ptr: &TypeIndexTablePtr)) {
309 MaxTypeIndex = std::max(a: MaxTypeIndex, b: static_cast<unsigned>(Index));
310 if (opts::PrintExceptions) {
311 BC.outs() << TSep;
312 printType(Index, BC.outs());
313 TSep = ", ";
314 }
315 }
316 MaxTypeIndexTableOffset = std::max(
317 a: MaxTypeIndexTableOffset, b: TypeIndexTablePtr - TypeIndexTableStart);
318 }
319
320 Sep = "; ";
321
322 ActionPtr = Self + ActionNext;
323 } while (ActionNext);
324 if (opts::PrintExceptions)
325 BC.outs() << '\n';
326 }
327 }
328 if (opts::PrintExceptions)
329 BC.outs() << '\n';
330
331 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
332 Data.getData().size() &&
333 "LSDA entry has crossed section boundary");
334
335 if (TTypeEnd) {
336 LSDAActionTable = LSDASectionData.slice(
337 N: ActionTableStart, M: TypeIndexTableStart -
338 MaxTypeIndex * TTypeEncodingSize -
339 ActionTableStart);
340 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
341 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
342 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
343 uint64_t TypeAddress =
344 *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, AbsPosOffset: TTEntryAddress);
345 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
346 TypeAddress = 0;
347 if (TTypeEncoding & DW_EH_PE_indirect) {
348 LSDATypeAddressTable.emplace_back(Args&: TypeAddress);
349 if (TypeAddress) {
350 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress);
351 assert(PointerOrErr && "failed to decode indirect address");
352 TypeAddress = *PointerOrErr;
353 }
354 }
355 LSDATypeTable.emplace_back(Args&: TypeAddress);
356 }
357 LSDATypeIndexTable =
358 LSDASectionData.slice(N: TypeIndexTableStart, M: MaxTypeIndexTableOffset);
359 }
360 return Error::success();
361}
362
363void BinaryFunction::updateEHRanges() {
364 if (getSize() == 0)
365 return;
366
367 assert(CurrentState == State::CFG_Finalized && "unexpected state");
368
369 // Build call sites table.
370 struct EHInfo {
371 const MCSymbol *LP; // landing pad
372 uint64_t Action;
373 };
374
375 // Sites to update.
376 CallSitesList Sites;
377
378 for (FunctionFragment &FF : getLayout().fragments()) {
379 // If previous call can throw, this is its exception handler.
380 EHInfo PreviousEH = {.LP: nullptr, .Action: 0};
381
382 // Marker for the beginning of exceptions range.
383 const MCSymbol *StartRange = nullptr;
384
385 for (BinaryBasicBlock *const BB : FF) {
386 for (MCInst &Instr : *BB) {
387 if (!BC.MIB->isCall(Inst: Instr))
388 continue;
389
390 // Instruction can throw an exception that should be handled.
391 const bool Throws = BC.MIB->isInvoke(Inst: Instr);
392
393 // Ignore the call if it's a continuation of a no-throw gap.
394 if (!Throws && !StartRange)
395 continue;
396
397 // Extract exception handling information from the instruction.
398 const MCSymbol *LP = nullptr;
399 uint64_t Action = 0;
400 if (const std::optional<MCPlus::MCLandingPad> EHInfo =
401 BC.MIB->getEHInfo(Inst: Instr))
402 std::tie(args&: LP, args&: Action) = *EHInfo;
403
404 // No action if the exception handler has not changed.
405 if (Throws && StartRange && PreviousEH.LP == LP &&
406 PreviousEH.Action == Action)
407 continue;
408
409 // Same symbol is used for the beginning and the end of the range.
410 MCSymbol *EHSymbol;
411 if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Inst: Instr)) {
412 EHSymbol = InstrLabel;
413 } else {
414 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
415 EHSymbol = BC.MIB->getOrCreateInstLabel(Inst&: Instr, Name: "EH", Ctx: BC.Ctx.get());
416 }
417
418 // At this point we could be in one of the following states:
419 //
420 // I. Exception handler has changed and we need to close previous range
421 // and start a new one.
422 //
423 // II. Start a new exception range after the gap.
424 //
425 // III. Close current exception range and start a new gap.
426 const MCSymbol *EndRange;
427 if (StartRange) {
428 // I, III:
429 EndRange = EHSymbol;
430 } else {
431 // II:
432 StartRange = EHSymbol;
433 EndRange = nullptr;
434 }
435
436 // Close the previous range.
437 if (EndRange)
438 Sites.emplace_back(
439 Args: FF.getFragmentNum(),
440 Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action});
441
442 if (Throws) {
443 // I, II:
444 StartRange = EHSymbol;
445 PreviousEH = EHInfo{.LP: LP, .Action: Action};
446 } else {
447 StartRange = nullptr;
448 }
449 }
450 }
451
452 // Check if we need to close the range.
453 if (StartRange) {
454 const MCSymbol *EndRange = getFunctionEndLabel(Fragment: FF.getFragmentNum());
455 Sites.emplace_back(
456 Args: FF.getFragmentNum(),
457 Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action});
458 }
459 }
460
461 addCallSites(NewCallSites: Sites);
462}
463
464const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
465
466CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
467 const DWARFDebugFrame &EHFrame)
468 : BC(BC) {
469 // Prepare FDEs for fast lookup
470 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
471 const auto *CurFDE = dyn_cast<dwarf::FDE>(Val: &Entry);
472 // Skip CIEs.
473 if (!CurFDE)
474 continue;
475 // There could me multiple FDEs with the same initial address, and perhaps
476 // different sizes (address ranges). Use the first entry with non-zero size.
477 auto FDEI = FDEs.lower_bound(x: CurFDE->getInitialLocation());
478 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
479 if (CurFDE->getAddressRange()) {
480 if (FDEI->second->getAddressRange() == 0) {
481 FDEI->second = CurFDE;
482 } else if (opts::Verbosity > 0) {
483 BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
484 << Twine::utohexstr(Val: FDEI->first)
485 << " detected; sizes: " << FDEI->second->getAddressRange()
486 << " and " << CurFDE->getAddressRange() << '\n';
487 }
488 }
489 } else {
490 FDEs.emplace_hint(pos: FDEI, args: CurFDE->getInitialLocation(), args&: CurFDE);
491 }
492 }
493}
494
495bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
496 uint64_t Address = Function.getAddress();
497 auto I = FDEs.find(x: Address);
498 // Ignore zero-length FDE ranges.
499 if (I == FDEs.end() || !I->second->getAddressRange())
500 return true;
501
502 const FDE &CurFDE = *I->second;
503 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
504 Function.setLSDAAddress(LSDA ? *LSDA : 0);
505
506 uint64_t Offset = Function.getFirstInstructionOffset();
507 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
508 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
509 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
510 Function.setPersonalityFunction(
511 *CurFDE.getLinkedCIE()->getPersonalityAddress());
512 Function.setPersonalityEncoding(
513 *CurFDE.getLinkedCIE()->getPersonalityEncoding());
514 }
515
516 auto decodeFrameInstruction = [this, &Function, &Offset, Address,
517 CodeAlignment, DataAlignment](
518 const CFIProgram::Instruction &Instr) {
519 uint8_t Opcode = Instr.Opcode;
520 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
521 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
522 switch (Instr.Opcode) {
523 case DW_CFA_nop:
524 break;
525 case DW_CFA_advance_loc4:
526 case DW_CFA_advance_loc2:
527 case DW_CFA_advance_loc1:
528 case DW_CFA_advance_loc:
529 // Advance our current address
530 Offset += CodeAlignment * int64_t(Instr.Ops[0]);
531 break;
532 case DW_CFA_offset_extended_sf:
533 Function.addCFIInstruction(
534 Offset,
535 Inst: MCCFIInstruction::createOffset(
536 L: nullptr, Register: Instr.Ops[0], Offset: DataAlignment * int64_t(Instr.Ops[1])));
537 break;
538 case DW_CFA_offset_extended:
539 case DW_CFA_offset:
540 Function.addCFIInstruction(
541 Offset, Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Instr.Ops[0],
542 Offset: DataAlignment * Instr.Ops[1]));
543 break;
544 case DW_CFA_restore_extended:
545 case DW_CFA_restore:
546 Function.addCFIInstruction(
547 Offset, Inst: MCCFIInstruction::createRestore(L: nullptr, Register: Instr.Ops[0]));
548 break;
549 case DW_CFA_set_loc:
550 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
551 assert(Instr.Ops[0] <= Address + Function.getSize() &&
552 "set_loc out of function bounds");
553 Offset = Instr.Ops[0] - Address;
554 break;
555
556 case DW_CFA_undefined:
557 Function.addCFIInstruction(
558 Offset, Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: Instr.Ops[0]));
559 break;
560 case DW_CFA_same_value:
561 Function.addCFIInstruction(
562 Offset, Inst: MCCFIInstruction::createSameValue(L: nullptr, Register: Instr.Ops[0]));
563 break;
564 case DW_CFA_register:
565 Function.addCFIInstruction(
566 Offset, Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: Instr.Ops[0],
567 Register2: Instr.Ops[1]));
568 break;
569 case DW_CFA_remember_state:
570 Function.addCFIInstruction(
571 Offset, Inst: MCCFIInstruction::createRememberState(L: nullptr));
572 break;
573 case DW_CFA_restore_state:
574 Function.addCFIInstruction(Offset,
575 Inst: MCCFIInstruction::createRestoreState(L: nullptr));
576 break;
577 case DW_CFA_def_cfa:
578 Function.addCFIInstruction(
579 Offset,
580 Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], Offset: Instr.Ops[1]));
581 break;
582 case DW_CFA_def_cfa_sf:
583 Function.addCFIInstruction(
584 Offset,
585 Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0],
586 Offset: DataAlignment * int64_t(Instr.Ops[1])));
587 break;
588 case DW_CFA_def_cfa_register:
589 Function.addCFIInstruction(Offset, Inst: MCCFIInstruction::createDefCfaRegister(
590 L: nullptr, Register: Instr.Ops[0]));
591 break;
592 case DW_CFA_def_cfa_offset:
593 Function.addCFIInstruction(
594 Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Instr.Ops[0]));
595 break;
596 case DW_CFA_def_cfa_offset_sf:
597 Function.addCFIInstruction(
598 Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(
599 L: nullptr, Offset: DataAlignment * int64_t(Instr.Ops[0])));
600 break;
601 case DW_CFA_GNU_args_size:
602 Function.addCFIInstruction(
603 Offset, Inst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Instr.Ops[0]));
604 Function.setUsesGnuArgsSize();
605 break;
606 case DW_CFA_val_offset_sf:
607 case DW_CFA_val_offset:
608 if (opts::Verbosity >= 1) {
609 BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
610 }
611 return false;
612 case DW_CFA_def_cfa_expression:
613 case DW_CFA_val_expression:
614 case DW_CFA_expression: {
615 StringRef ExprBytes = Instr.Expression->getData();
616 std::string Str;
617 raw_string_ostream OS(Str);
618 // Manually encode this instruction using CFI escape
619 OS << Opcode;
620 if (Opcode != DW_CFA_def_cfa_expression)
621 encodeULEB128(Value: Instr.Ops[0], OS);
622 encodeULEB128(Value: ExprBytes.size(), OS);
623 OS << ExprBytes;
624 Function.addCFIInstruction(
625 Offset, Inst: MCCFIInstruction::createEscape(L: nullptr, Vals: OS.str()));
626 break;
627 }
628 case DW_CFA_MIPS_advance_loc8:
629 if (opts::Verbosity >= 1)
630 BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
631 return false;
632 case DW_CFA_GNU_window_save:
633 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
634 // id but mean different things. The latter is used in AArch64.
635 if (Function.getBinaryContext().isAArch64()) {
636 Function.addCFIInstruction(
637 Offset, Inst: MCCFIInstruction::createNegateRAState(L: nullptr));
638 break;
639 }
640 if (opts::Verbosity >= 1)
641 BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
642 return false;
643 case DW_CFA_lo_user:
644 case DW_CFA_hi_user:
645 if (opts::Verbosity >= 1)
646 BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
647 return false;
648 default:
649 if (opts::Verbosity >= 1)
650 BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
651 << Instr.Opcode << '\n';
652 return false;
653 }
654
655 return true;
656 };
657
658 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
659 if (!decodeFrameInstruction(Instr))
660 return false;
661
662 for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
663 if (!decodeFrameInstruction(Instr))
664 return false;
665
666 return true;
667}
668
669std::vector<char> CFIReaderWriter::generateEHFrameHeader(
670 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
671 uint64_t EHFrameHeaderAddress,
672 std::vector<uint64_t> &FailedAddresses) const {
673 // Common PC -> FDE map to be written into .eh_frame_hdr.
674 std::map<uint64_t, uint64_t> PCToFDE;
675
676 // Presort array for binary search.
677 llvm::sort(C&: FailedAddresses);
678
679 // Initialize PCToFDE using NewEHFrame.
680 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
681 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry);
682 if (FDE == nullptr)
683 continue;
684 const uint64_t FuncAddress = FDE->getInitialLocation();
685 const uint64_t FDEAddress =
686 NewEHFrame.getEHFrameAddress() + FDE->getOffset();
687
688 // Ignore unused FDEs.
689 if (FuncAddress == 0)
690 continue;
691
692 // Add the address to the map unless we failed to write it.
693 if (!std::binary_search(first: FailedAddresses.begin(), last: FailedAddresses.end(),
694 val: FuncAddress)) {
695 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
696 << Twine::utohexstr(FuncAddress) << " is at 0x"
697 << Twine::utohexstr(FDEAddress) << '\n');
698 PCToFDE[FuncAddress] = FDEAddress;
699 }
700 };
701
702 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
703 << llvm::size(NewEHFrame.entries()) << " entries\n");
704
705 // Add entries from the original .eh_frame corresponding to the functions
706 // that we did not update.
707 for (const dwarf::FrameEntry &Entry : OldEHFrame) {
708 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry);
709 if (FDE == nullptr)
710 continue;
711 const uint64_t FuncAddress = FDE->getInitialLocation();
712 const uint64_t FDEAddress =
713 OldEHFrame.getEHFrameAddress() + FDE->getOffset();
714
715 // Add the address if we failed to write it.
716 if (PCToFDE.count(x: FuncAddress) == 0) {
717 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
718 << Twine::utohexstr(FuncAddress) << " is at 0x"
719 << Twine::utohexstr(FDEAddress) << '\n');
720 PCToFDE[FuncAddress] = FDEAddress;
721 }
722 };
723
724 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
725 << llvm::size(OldEHFrame.entries()) << " entries\n");
726
727 // Generate a new .eh_frame_hdr based on the new map.
728
729 // Header plus table of entries of size 8 bytes.
730 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
731
732 // Version is 1.
733 EHFrameHeader[0] = 1;
734 // Encoding of the eh_frame pointer.
735 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
736 // Encoding of the count field to follow.
737 EHFrameHeader[2] = DW_EH_PE_udata4;
738 // Encoding of the table entries - 4-byte offset from the start of the header.
739 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
740
741 // Address of eh_frame. Use the new one.
742 support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
743 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
744
745 // Number of entries in the table (FDE count).
746 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
747
748 // Write the table at offset 12.
749 char *Ptr = EHFrameHeader.data();
750 uint32_t Offset = 12;
751 for (const auto &PCI : PCToFDE) {
752 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
753 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
754 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
755 Offset += 4;
756 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
757 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
758 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
759 Offset += 4;
760 }
761
762 return EHFrameHeader;
763}
764
765Error EHFrameParser::parseCIE(uint64_t StartOffset) {
766 uint8_t Version = Data.getU8(offset_ptr: &Offset);
767 const char *Augmentation = Data.getCStr(OffsetPtr: &Offset);
768 StringRef AugmentationString(Augmentation ? Augmentation : "");
769 uint8_t AddressSize =
770 Version < 4 ? Data.getAddressSize() : Data.getU8(offset_ptr: &Offset);
771 Data.setAddressSize(AddressSize);
772 // Skip segment descriptor size
773 if (Version >= 4)
774 Offset += 1;
775 // Skip code alignment factor
776 Data.getULEB128(offset_ptr: &Offset);
777 // Skip data alignment
778 Data.getSLEB128(OffsetPtr: &Offset);
779 // Skip return address register
780 if (Version == 1)
781 Offset += 1;
782 else
783 Data.getULEB128(offset_ptr: &Offset);
784
785 uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
786 uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
787 // Walk the augmentation string to get all the augmentation data.
788 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
789 switch (AugmentationString[i]) {
790 default:
791 return createStringError(
792 EC: errc::invalid_argument,
793 Fmt: "unknown augmentation character in entry at 0x%" PRIx64, Vals: StartOffset);
794 case 'L':
795 LSDAPointerEncoding = Data.getU8(offset_ptr: &Offset);
796 break;
797 case 'P': {
798 uint32_t PersonalityEncoding = Data.getU8(offset_ptr: &Offset);
799 std::optional<uint64_t> Personality =
800 Data.getEncodedPointer(Offset: &Offset, Encoding: PersonalityEncoding,
801 AbsPosOffset: EHFrameAddress ? EHFrameAddress + Offset : 0);
802 // Patch personality address
803 if (Personality)
804 PatcherCallback(*Personality, Offset, PersonalityEncoding);
805 break;
806 }
807 case 'R':
808 FDEPointerEncoding = Data.getU8(offset_ptr: &Offset);
809 break;
810 case 'z':
811 if (i)
812 return createStringError(
813 EC: errc::invalid_argument,
814 Fmt: "'z' must be the first character at 0x%" PRIx64, Vals: StartOffset);
815 // Skip augmentation length
816 Data.getULEB128(offset_ptr: &Offset);
817 break;
818 case 'S':
819 case 'B':
820 break;
821 }
822 }
823 Entries.emplace_back(args: std::make_unique<CIEInfo>(
824 args&: FDEPointerEncoding, args&: LSDAPointerEncoding, args&: AugmentationString));
825 CIEs[StartOffset] = &*Entries.back();
826 return Error::success();
827}
828
829Error EHFrameParser::parseFDE(uint64_t CIEPointer,
830 uint64_t StartStructureOffset) {
831 std::optional<uint64_t> LSDAAddress;
832 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
833
834 // The address size is encoded in the CIE we reference.
835 if (!Cie)
836 return createStringError(EC: errc::invalid_argument,
837 Fmt: "parsing FDE data at 0x%" PRIx64
838 " failed due to missing CIE",
839 Vals: StartStructureOffset);
840 // Patch initial location
841 if (auto Val = Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding,
842 AbsPosOffset: EHFrameAddress + Offset)) {
843 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
844 }
845 // Skip address range
846 Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, AbsPosOffset: 0);
847
848 // Process augmentation data for this FDE.
849 StringRef AugmentationString = Cie->AugmentationString;
850 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
851 // Skip augmentation length
852 Data.getULEB128(offset_ptr: &Offset);
853 LSDAAddress =
854 Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->LSDAPtrEncoding,
855 AbsPosOffset: EHFrameAddress ? Offset + EHFrameAddress : 0);
856 // Patch LSDA address
857 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
858 }
859 return Error::success();
860}
861
862Error EHFrameParser::parse() {
863 while (Data.isValidOffset(offset: Offset)) {
864 const uint64_t StartOffset = Offset;
865
866 uint64_t Length;
867 DwarfFormat Format;
868 std::tie(args&: Length, args&: Format) = Data.getInitialLength(Off: &Offset);
869
870 // If the Length is 0, then this CIE is a terminator
871 if (Length == 0)
872 break;
873
874 const uint64_t StartStructureOffset = Offset;
875 const uint64_t EndStructureOffset = Offset + Length;
876
877 Error Err = Error::success();
878 const uint64_t Id = Data.getRelocatedValue(Size: 4, Off: &Offset,
879 /*SectionIndex=*/nullptr, Err: &Err);
880 if (Err)
881 return Err;
882
883 if (!Id) {
884 if (Error Err = parseCIE(StartOffset))
885 return Err;
886 } else {
887 if (Error Err = parseFDE(CIEPointer: Id, StartStructureOffset))
888 return Err;
889 }
890 Offset = EndStructureOffset;
891 }
892
893 return Error::success();
894}
895
896Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
897 PatcherCallbackTy PatcherCallback) {
898 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
899 return Parser.parse();
900}
901
902} // namespace bolt
903} // namespace llvm
904

source code of bolt/lib/Core/Exceptions.cpp