1//===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions for handling C++ exception meta data.
10//
11// Some of the code is taken from examples/ExceptionDemo
12//
13//===----------------------------------------------------------------------===//
14
15#include "bolt/Core/Exceptions.h"
16#include "bolt/Core/BinaryFunction.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/BinaryFormat/Dwarf.h"
20#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/Errc.h"
25#include "llvm/Support/LEB128.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28#include <map>
29
30#undef DEBUG_TYPE
31#define DEBUG_TYPE "bolt-exceptions"
32
33using namespace llvm::dwarf;
34
35namespace opts {
36
37extern llvm::cl::OptionCategory BoltCategory;
38
39extern llvm::cl::opt<unsigned> Verbosity;
40
41static llvm::cl::opt<bool>
42 PrintExceptions("print-exceptions",
43 llvm::cl::desc("print exception handling data"),
44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45
46} // namespace opts
47
48namespace llvm {
49namespace bolt {
50
51// Read and dump the .gcc_exception_table section entry.
52//
53// .gcc_except_table section contains a set of Language-Specific Data Areas -
54// a fancy name for exception handling tables. There's one LSDA entry per
55// function. However, we can't actually tell which function LSDA refers to
56// unless we parse .eh_frame entry that refers to the LSDA.
57// Then inside LSDA most addresses are encoded relative to the function start,
58// so we need the function context in order to get to real addresses.
59//
60// The best visual representation of the tables comprising LSDA and
61// relationships between them is illustrated at:
62// https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63// Keep in mind that GCC implementation deviates slightly from that document.
64//
65// To summarize, there are 4 tables in LSDA: call site table, actions table,
66// types table, and types index table (for indirection). The main table contains
67// call site entries. Each call site includes a PC range that can throw an
68// exception, a handler (landing pad), and a reference to an entry in the action
69// table. The handler and/or action could be 0. The action entry is a head
70// of a list of actions associated with a call site. The action table contains
71// all such lists (it could be optimized to share list tails). Each action could
72// be either to catch an exception of a given type, to perform a cleanup, or to
73// propagate the exception after filtering it out (e.g. to make sure function
74// exception specification is not violated). Catch action contains a reference
75// to an entry in the type table, and filter action refers to an entry in the
76// type index table to encode a set of types to filter.
77//
78// Call site table follows LSDA header. Action table immediately follows the
79// call site table.
80//
81// Both types table and type index table start at the same location, but they
82// grow in opposite directions (types go up, indices go down). The beginning of
83// these tables is encoded in LSDA header. Sizes for both of the tables are not
84// included anywhere.
85//
86// We have to parse all of the tables to determine their sizes. Then we have
87// to parse the call site table and associate discovered information with
88// actual call instructions and landing pad blocks.
89//
90// For the purpose of rewriting exception handling tables, we can reuse action,
91// and type index tables in their original binary format.
92//
93// Type table could be encoded using position-independent references, and thus
94// may require relocation.
95//
96// Ideally we should be able to re-write LSDA in-place, without the need to
97// allocate a new space for it. Sadly there's no guarantee that the new call
98// site table will be the same size as GCC uses uleb encodings for PC offsets.
99//
100// Note: some functions have LSDA entries with 0 call site entries.
101Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102 uint64_t LSDASectionAddress) {
103 assert(CurrentState == State::Disassembled && "unexpected function state");
104
105 if (!getLSDAAddress())
106 return Error::success();
107
108 DWARFDataExtractor Data(
109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110 LSDASectionData.size()),
111 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113 assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114
115 const uint8_t LPStartEncoding = Data.getU8(offset_ptr: &Offset);
116 uint64_t LPStart = Address;
117 if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
118 std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119 Offset: &Offset, Encoding: LPStartEncoding, PCRelOffset: Offset + LSDASectionAddress);
120 if (!MaybeLPStart) {
121 BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
122 << (unsigned)LPStartEncoding << '\n';
123 return createFatalBOLTError(S: "");
124 }
125 LPStart = *MaybeLPStart;
126 }
127
128 const uint8_t TTypeEncoding = Data.getU8(offset_ptr: &Offset);
129 LSDATypeEncoding = TTypeEncoding;
130 size_t TTypeEncodingSize = 0;
131 uintptr_t TTypeEnd = 0;
132 if (TTypeEncoding != DW_EH_PE_omit) {
133 TTypeEnd = Data.getULEB128(offset_ptr: &Offset);
134 TTypeEncodingSize = BC.getDWARFEncodingSize(Encoding: TTypeEncoding);
135 }
136
137 if (opts::PrintExceptions) {
138 BC.outs() << "[LSDA at 0x" << Twine::utohexstr(Val: getLSDAAddress())
139 << " for function " << *this << "]:\n";
140 BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(Val: LPStartEncoding)
141 << '\n';
142 BC.outs() << "LPStart = 0x" << Twine::utohexstr(Val: LPStart) << '\n';
143 BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(Val: TTypeEncoding)
144 << '\n';
145 BC.outs() << "TType End = " << TTypeEnd << '\n';
146 }
147
148 // Table to store list of indices in type table. Entries are uleb128 values.
149 const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
150
151 // Offset past the last decoded index.
152 uint64_t MaxTypeIndexTableOffset = 0;
153
154 // Max positive index used in type table.
155 unsigned MaxTypeIndex = 0;
156
157 // The actual type info table starts at the same location, but grows in
158 // opposite direction. TTypeEncoding is used to encode stored values.
159 const uint64_t TypeTableStart = Offset + TTypeEnd;
160
161 uint8_t CallSiteEncoding = Data.getU8(offset_ptr: &Offset);
162 uint32_t CallSiteTableLength = Data.getULEB128(offset_ptr: &Offset);
163 uint64_t CallSiteTableStart = Offset;
164 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
165 uint64_t CallSitePtr = CallSiteTableStart;
166 uint64_t ActionTableStart = CallSiteTableEnd;
167
168 if (opts::PrintExceptions) {
169 BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
170 BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
171 BC.outs() << '\n';
172 }
173
174 this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
175 const uint64_t RangeBase = getAddress();
176 while (CallSitePtr < CallSiteTableEnd) {
177 uint64_t Start = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding,
178 PCRelOffset: CallSitePtr + LSDASectionAddress);
179 uint64_t Length = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding,
180 PCRelOffset: CallSitePtr + LSDASectionAddress);
181 uint64_t LandingPad = *Data.getEncodedPointer(
182 Offset: &CallSitePtr, Encoding: CallSiteEncoding, PCRelOffset: CallSitePtr + LSDASectionAddress);
183 uint64_t ActionEntry = Data.getULEB128(offset_ptr: &CallSitePtr);
184 if (LandingPad)
185 LandingPad += LPStart;
186
187 if (opts::PrintExceptions) {
188 BC.outs() << "Call Site: [0x" << Twine::utohexstr(Val: RangeBase + Start)
189 << ", 0x" << Twine::utohexstr(Val: RangeBase + Start + Length)
190 << "); landing pad: 0x" << Twine::utohexstr(Val: LandingPad)
191 << "; action entry: 0x" << Twine::utohexstr(Val: ActionEntry)
192 << "\n";
193 BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
194 << '\n';
195 }
196
197 // Create a handler entry if necessary.
198 MCSymbol *LPSymbol = nullptr;
199 if (LandingPad) {
200 // Verify if landing pad code is located outside current function
201 // Support landing pad to builtin_unreachable
202 if (LandingPad < Address || LandingPad > Address + getSize()) {
203 BinaryFunction *Fragment =
204 BC.getBinaryFunctionContainingAddress(Address: LandingPad);
205 assert(Fragment != nullptr &&
206 "BOLT-ERROR: cannot find landing pad fragment");
207 BC.addInterproceduralReference(Function: this, Address: Fragment->getAddress());
208 BC.processInterproceduralReferences();
209 assert(BC.areRelatedFragments(this, Fragment) &&
210 "BOLT-ERROR: cannot have landing pads in different functions");
211 setHasIndirectTargetToSplitFragment(true);
212 BC.addFragmentsToSkip(Function: this);
213 return Error::success();
214 }
215
216 const uint64_t LPOffset = LandingPad - getAddress();
217 if (!getInstructionAtOffset(Offset: LPOffset)) {
218 if (opts::Verbosity >= 1)
219 BC.errs() << "BOLT-WARNING: landing pad "
220 << Twine::utohexstr(Val: LPOffset)
221 << " not pointing to an instruction in function " << *this
222 << " - ignoring.\n";
223 } else {
224 auto Label = Labels.find(x: LPOffset);
225 if (Label != Labels.end()) {
226 LPSymbol = Label->second;
227 } else {
228 LPSymbol = BC.Ctx->createNamedTempSymbol(Name: "LP");
229 Labels[LPOffset] = LPSymbol;
230 }
231 }
232 }
233
234 // Mark all call instructions in the range.
235 auto II = Instructions.find(x: Start);
236 auto IE = Instructions.end();
237 assert(II != IE && "exception range not pointing to an instruction");
238 do {
239 MCInst &Instruction = II->second;
240 if (BC.MIB->isCall(Inst: Instruction) &&
241 !BC.MIB->getConditionalTailCall(Inst: Instruction)) {
242 assert(!BC.MIB->isInvoke(Instruction) &&
243 "overlapping exception ranges detected");
244 // Add extra operands to a call instruction making it an invoke from
245 // now on.
246 BC.MIB->addEHInfo(Inst&: Instruction,
247 LP: MCPlus::MCLandingPad(LPSymbol, ActionEntry));
248 }
249 ++II;
250 } while (II != IE && II->first < Start + Length);
251
252 if (ActionEntry != 0) {
253 auto printType = [&](int Index, raw_ostream &OS) {
254 assert(Index > 0 && "only positive indices are valid");
255 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
256 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
257 uint64_t TypeAddress =
258 *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, PCRelOffset: TTEntryAddress);
259 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
260 TypeAddress = 0;
261 if (TypeAddress == 0) {
262 OS << "<all>";
263 return;
264 }
265 if (TTypeEncoding & DW_EH_PE_indirect) {
266 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress);
267 assert(PointerOrErr && "failed to decode indirect address");
268 TypeAddress = *PointerOrErr;
269 }
270 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(Address: TypeAddress))
271 OS << TypeSymBD->getName();
272 else
273 OS << "0x" << Twine::utohexstr(Val: TypeAddress);
274 };
275 if (opts::PrintExceptions)
276 BC.outs() << " actions: ";
277 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
278 int64_t ActionType;
279 int64_t ActionNext;
280 const char *Sep = "";
281 do {
282 ActionType = Data.getSLEB128(OffsetPtr: &ActionPtr);
283 const uint32_t Self = ActionPtr;
284 ActionNext = Data.getSLEB128(OffsetPtr: &ActionPtr);
285 if (opts::PrintExceptions)
286 BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
287 if (ActionType == 0) {
288 if (opts::PrintExceptions)
289 BC.outs() << "cleanup";
290 } else if (ActionType > 0) {
291 // It's an index into a type table.
292 MaxTypeIndex =
293 std::max(a: MaxTypeIndex, b: static_cast<unsigned>(ActionType));
294 if (opts::PrintExceptions) {
295 BC.outs() << "catch type ";
296 printType(ActionType, BC.outs());
297 }
298 } else { // ActionType < 0
299 if (opts::PrintExceptions)
300 BC.outs() << "filter exception types ";
301 const char *TSep = "";
302 // ActionType is a negative *byte* offset into *uleb128-encoded* table
303 // of indices with base 1.
304 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
305 // encoded using uleb128 thus we cannot directly dereference them.
306 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
307 while (uint64_t Index = Data.getULEB128(offset_ptr: &TypeIndexTablePtr)) {
308 MaxTypeIndex = std::max(a: MaxTypeIndex, b: static_cast<unsigned>(Index));
309 if (opts::PrintExceptions) {
310 BC.outs() << TSep;
311 printType(Index, BC.outs());
312 TSep = ", ";
313 }
314 }
315 MaxTypeIndexTableOffset = std::max(
316 a: MaxTypeIndexTableOffset, b: TypeIndexTablePtr - TypeIndexTableStart);
317 }
318
319 Sep = "; ";
320
321 ActionPtr = Self + ActionNext;
322 } while (ActionNext);
323 if (opts::PrintExceptions)
324 BC.outs() << '\n';
325 }
326 }
327 if (opts::PrintExceptions)
328 BC.outs() << '\n';
329
330 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
331 Data.getData().size() &&
332 "LSDA entry has crossed section boundary");
333
334 if (TTypeEnd) {
335 LSDAActionTable = LSDASectionData.slice(
336 N: ActionTableStart, M: TypeIndexTableStart -
337 MaxTypeIndex * TTypeEncodingSize -
338 ActionTableStart);
339 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
340 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
341 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
342 uint64_t TypeAddress =
343 *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, PCRelOffset: TTEntryAddress);
344 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
345 TypeAddress = 0;
346 if (TTypeEncoding & DW_EH_PE_indirect) {
347 LSDATypeAddressTable.emplace_back(Args&: TypeAddress);
348 if (TypeAddress) {
349 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress);
350 assert(PointerOrErr && "failed to decode indirect address");
351 TypeAddress = *PointerOrErr;
352 }
353 }
354 LSDATypeTable.emplace_back(Args&: TypeAddress);
355 }
356 LSDATypeIndexTable =
357 LSDASectionData.slice(N: TypeIndexTableStart, M: MaxTypeIndexTableOffset);
358 }
359 return Error::success();
360}
361
362void BinaryFunction::updateEHRanges() {
363 if (getSize() == 0)
364 return;
365
366 assert(CurrentState == State::CFG_Finalized && "unexpected state");
367
368 // Build call sites table.
369 struct EHInfo {
370 const MCSymbol *LP; // landing pad
371 uint64_t Action;
372 };
373
374 // Sites to update.
375 CallSitesList Sites;
376
377 for (FunctionFragment &FF : getLayout().fragments()) {
378 // If previous call can throw, this is its exception handler.
379 EHInfo PreviousEH = {.LP: nullptr, .Action: 0};
380
381 // Marker for the beginning of exceptions range.
382 const MCSymbol *StartRange = nullptr;
383
384 for (BinaryBasicBlock *const BB : FF) {
385 for (MCInst &Instr : *BB) {
386 if (!BC.MIB->isCall(Inst: Instr))
387 continue;
388
389 // Instruction can throw an exception that should be handled.
390 const bool Throws = BC.MIB->isInvoke(Inst: Instr);
391
392 // Ignore the call if it's a continuation of a no-throw gap.
393 if (!Throws && !StartRange)
394 continue;
395
396 // Extract exception handling information from the instruction.
397 const MCSymbol *LP = nullptr;
398 uint64_t Action = 0;
399 if (const std::optional<MCPlus::MCLandingPad> EHInfo =
400 BC.MIB->getEHInfo(Inst: Instr))
401 std::tie(args&: LP, args&: Action) = *EHInfo;
402
403 // No action if the exception handler has not changed.
404 if (Throws && StartRange && PreviousEH.LP == LP &&
405 PreviousEH.Action == Action)
406 continue;
407
408 // Same symbol is used for the beginning and the end of the range.
409 MCSymbol *EHSymbol;
410 if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Inst: Instr)) {
411 EHSymbol = InstrLabel;
412 } else {
413 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
414 EHSymbol = BC.MIB->getOrCreateInstLabel(Inst&: Instr, Name: "EH", Ctx: BC.Ctx.get());
415 }
416
417 // At this point we could be in one of the following states:
418 //
419 // I. Exception handler has changed and we need to close previous range
420 // and start a new one.
421 //
422 // II. Start a new exception range after the gap.
423 //
424 // III. Close current exception range and start a new gap.
425 const MCSymbol *EndRange;
426 if (StartRange) {
427 // I, III:
428 EndRange = EHSymbol;
429 } else {
430 // II:
431 StartRange = EHSymbol;
432 EndRange = nullptr;
433 }
434
435 // Close the previous range.
436 if (EndRange)
437 Sites.emplace_back(
438 Args: FF.getFragmentNum(),
439 Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action});
440
441 if (Throws) {
442 // I, II:
443 StartRange = EHSymbol;
444 PreviousEH = EHInfo{.LP: LP, .Action: Action};
445 } else {
446 StartRange = nullptr;
447 }
448 }
449 }
450
451 // Check if we need to close the range.
452 if (StartRange) {
453 const MCSymbol *EndRange = getFunctionEndLabel(Fragment: FF.getFragmentNum());
454 Sites.emplace_back(
455 Args: FF.getFragmentNum(),
456 Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action});
457 }
458 }
459
460 addCallSites(NewCallSites: Sites);
461}
462
463const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
464
465CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
466 const DWARFDebugFrame &EHFrame)
467 : BC(BC) {
468 // Prepare FDEs for fast lookup
469 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
470 const auto *CurFDE = dyn_cast<dwarf::FDE>(Val: &Entry);
471 // Skip CIEs.
472 if (!CurFDE)
473 continue;
474 // There could me multiple FDEs with the same initial address, and perhaps
475 // different sizes (address ranges). Use the first entry with non-zero size.
476 auto FDEI = FDEs.lower_bound(x: CurFDE->getInitialLocation());
477 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
478 if (CurFDE->getAddressRange()) {
479 if (FDEI->second->getAddressRange() == 0) {
480 FDEI->second = CurFDE;
481 } else if (opts::Verbosity > 0) {
482 BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
483 << Twine::utohexstr(Val: FDEI->first)
484 << " detected; sizes: " << FDEI->second->getAddressRange()
485 << " and " << CurFDE->getAddressRange() << '\n';
486 }
487 }
488 } else {
489 FDEs.emplace_hint(pos: FDEI, args: CurFDE->getInitialLocation(), args&: CurFDE);
490 }
491 }
492}
493
494bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
495 uint64_t Address = Function.getAddress();
496 auto I = FDEs.find(x: Address);
497 // Ignore zero-length FDE ranges.
498 if (I == FDEs.end() || !I->second->getAddressRange())
499 return true;
500
501 const FDE &CurFDE = *I->second;
502 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
503 Function.setLSDAAddress(LSDA ? *LSDA : 0);
504
505 uint64_t Offset = Function.getFirstInstructionOffset();
506 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
507 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
508 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
509 Function.setPersonalityFunction(
510 *CurFDE.getLinkedCIE()->getPersonalityAddress());
511 Function.setPersonalityEncoding(
512 *CurFDE.getLinkedCIE()->getPersonalityEncoding());
513 }
514
515 auto decodeFrameInstruction = [this, &Function, &Offset, Address,
516 CodeAlignment, DataAlignment](
517 const CFIProgram::Instruction &Instr) {
518 uint8_t Opcode = Instr.Opcode;
519 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
520 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
521 switch (Instr.Opcode) {
522 case DW_CFA_nop:
523 break;
524 case DW_CFA_advance_loc4:
525 case DW_CFA_advance_loc2:
526 case DW_CFA_advance_loc1:
527 case DW_CFA_advance_loc:
528 // Advance our current address
529 Offset += CodeAlignment * int64_t(Instr.Ops[0]);
530 break;
531 case DW_CFA_offset_extended_sf:
532 Function.addCFIInstruction(
533 Offset,
534 Inst: MCCFIInstruction::createOffset(
535 L: nullptr, Register: Instr.Ops[0], Offset: DataAlignment * int64_t(Instr.Ops[1])));
536 break;
537 case DW_CFA_offset_extended:
538 case DW_CFA_offset:
539 Function.addCFIInstruction(
540 Offset, Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Instr.Ops[0],
541 Offset: DataAlignment * Instr.Ops[1]));
542 break;
543 case DW_CFA_restore_extended:
544 case DW_CFA_restore:
545 Function.addCFIInstruction(
546 Offset, Inst: MCCFIInstruction::createRestore(L: nullptr, Register: Instr.Ops[0]));
547 break;
548 case DW_CFA_set_loc:
549 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
550 assert(Instr.Ops[0] <= Address + Function.getSize() &&
551 "set_loc out of function bounds");
552 Offset = Instr.Ops[0] - Address;
553 break;
554
555 case DW_CFA_undefined:
556 Function.addCFIInstruction(
557 Offset, Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: Instr.Ops[0]));
558 break;
559 case DW_CFA_same_value:
560 Function.addCFIInstruction(
561 Offset, Inst: MCCFIInstruction::createSameValue(L: nullptr, Register: Instr.Ops[0]));
562 break;
563 case DW_CFA_register:
564 Function.addCFIInstruction(
565 Offset, Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: Instr.Ops[0],
566 Register2: Instr.Ops[1]));
567 break;
568 case DW_CFA_remember_state:
569 Function.addCFIInstruction(
570 Offset, Inst: MCCFIInstruction::createRememberState(L: nullptr));
571 break;
572 case DW_CFA_restore_state:
573 Function.addCFIInstruction(Offset,
574 Inst: MCCFIInstruction::createRestoreState(L: nullptr));
575 break;
576 case DW_CFA_def_cfa:
577 Function.addCFIInstruction(
578 Offset,
579 Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], Offset: Instr.Ops[1]));
580 break;
581 case DW_CFA_def_cfa_sf:
582 Function.addCFIInstruction(
583 Offset,
584 Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0],
585 Offset: DataAlignment * int64_t(Instr.Ops[1])));
586 break;
587 case DW_CFA_def_cfa_register:
588 Function.addCFIInstruction(Offset, Inst: MCCFIInstruction::createDefCfaRegister(
589 L: nullptr, Register: Instr.Ops[0]));
590 break;
591 case DW_CFA_def_cfa_offset:
592 Function.addCFIInstruction(
593 Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Instr.Ops[0]));
594 break;
595 case DW_CFA_def_cfa_offset_sf:
596 Function.addCFIInstruction(
597 Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(
598 L: nullptr, Offset: DataAlignment * int64_t(Instr.Ops[0])));
599 break;
600 case DW_CFA_GNU_args_size:
601 Function.addCFIInstruction(
602 Offset, Inst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Instr.Ops[0]));
603 Function.setUsesGnuArgsSize();
604 break;
605 case DW_CFA_val_offset_sf:
606 case DW_CFA_val_offset:
607 if (opts::Verbosity >= 1) {
608 BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
609 }
610 return false;
611 case DW_CFA_def_cfa_expression:
612 case DW_CFA_val_expression:
613 case DW_CFA_expression: {
614 StringRef ExprBytes = Instr.Expression->getData();
615 std::string Str;
616 raw_string_ostream OS(Str);
617 // Manually encode this instruction using CFI escape
618 OS << Opcode;
619 if (Opcode != DW_CFA_def_cfa_expression)
620 encodeULEB128(Value: Instr.Ops[0], OS);
621 encodeULEB128(Value: ExprBytes.size(), OS);
622 OS << ExprBytes;
623 Function.addCFIInstruction(
624 Offset, Inst: MCCFIInstruction::createEscape(L: nullptr, Vals: OS.str()));
625 break;
626 }
627 case DW_CFA_MIPS_advance_loc8:
628 if (opts::Verbosity >= 1)
629 BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
630 return false;
631 case DW_CFA_GNU_window_save:
632 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
633 // id but mean different things. The latter is used in AArch64.
634 if (Function.getBinaryContext().isAArch64()) {
635 Function.addCFIInstruction(
636 Offset, Inst: MCCFIInstruction::createNegateRAState(L: nullptr));
637 break;
638 }
639 if (opts::Verbosity >= 1)
640 BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
641 return false;
642 case DW_CFA_lo_user:
643 case DW_CFA_hi_user:
644 if (opts::Verbosity >= 1)
645 BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
646 return false;
647 default:
648 if (opts::Verbosity >= 1)
649 BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
650 << Instr.Opcode << '\n';
651 return false;
652 }
653
654 return true;
655 };
656
657 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
658 if (!decodeFrameInstruction(Instr))
659 return false;
660
661 for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
662 if (!decodeFrameInstruction(Instr))
663 return false;
664
665 return true;
666}
667
668std::vector<char>
669CFIReaderWriter::generateEHFrameHeader(const DWARFDebugFrame &OldEHFrame,
670 const DWARFDebugFrame &NewEHFrame,
671 uint64_t EHFrameHeaderAddress) const {
672 // Common PC -> FDE map to be written into .eh_frame_hdr.
673 std::map<uint64_t, uint64_t> PCToFDE;
674
675 // Initialize PCToFDE using NewEHFrame.
676 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
677 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry);
678 if (FDE == nullptr)
679 continue;
680 const uint64_t FuncAddress = FDE->getInitialLocation();
681 const uint64_t FDEAddress =
682 NewEHFrame.getEHFrameAddress() + FDE->getOffset();
683
684 // Ignore unused FDEs.
685 if (FuncAddress == 0)
686 continue;
687
688 // Add the address to the map unless we failed to write it.
689 PCToFDE[FuncAddress] = FDEAddress;
690 };
691
692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
693 << llvm::size(NewEHFrame.entries()) << " entries\n");
694
695 // Add entries from the original .eh_frame corresponding to the functions
696 // that we did not update.
697 for (const dwarf::FrameEntry &Entry : OldEHFrame) {
698 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry);
699 if (FDE == nullptr)
700 continue;
701 const uint64_t FuncAddress = FDE->getInitialLocation();
702 const uint64_t FDEAddress =
703 OldEHFrame.getEHFrameAddress() + FDE->getOffset();
704
705 // Add the address if we failed to write it.
706 if (PCToFDE.count(x: FuncAddress) == 0) {
707 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
708 << Twine::utohexstr(FuncAddress) << " is at 0x"
709 << Twine::utohexstr(FDEAddress) << '\n');
710 PCToFDE[FuncAddress] = FDEAddress;
711 }
712 };
713
714 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
715 << llvm::size(OldEHFrame.entries()) << " entries\n");
716
717 // Generate a new .eh_frame_hdr based on the new map.
718
719 // Header plus table of entries of size 8 bytes.
720 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
721
722 // Version is 1.
723 EHFrameHeader[0] = 1;
724 // Encoding of the eh_frame pointer.
725 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
726 // Encoding of the count field to follow.
727 EHFrameHeader[2] = DW_EH_PE_udata4;
728 // Encoding of the table entries - 4-byte offset from the start of the header.
729 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
730
731 // Address of eh_frame. Use the new one.
732 support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
733 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
734
735 // Number of entries in the table (FDE count).
736 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
737
738 // Write the table at offset 12.
739 char *Ptr = EHFrameHeader.data();
740 uint32_t Offset = 12;
741 for (const auto &PCI : PCToFDE) {
742 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
743 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
744 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
745 Offset += 4;
746 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
747 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
748 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
749 Offset += 4;
750 }
751
752 return EHFrameHeader;
753}
754
755Error EHFrameParser::parseCIE(uint64_t StartOffset) {
756 uint8_t Version = Data.getU8(offset_ptr: &Offset);
757 const char *Augmentation = Data.getCStr(OffsetPtr: &Offset);
758 StringRef AugmentationString(Augmentation ? Augmentation : "");
759 uint8_t AddressSize =
760 Version < 4 ? Data.getAddressSize() : Data.getU8(offset_ptr: &Offset);
761 Data.setAddressSize(AddressSize);
762 // Skip segment descriptor size
763 if (Version >= 4)
764 Offset += 1;
765 // Skip code alignment factor
766 Data.getULEB128(offset_ptr: &Offset);
767 // Skip data alignment
768 Data.getSLEB128(OffsetPtr: &Offset);
769 // Skip return address register
770 if (Version == 1)
771 Offset += 1;
772 else
773 Data.getULEB128(offset_ptr: &Offset);
774
775 uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
776 uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
777 // Walk the augmentation string to get all the augmentation data.
778 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
779 switch (AugmentationString[i]) {
780 default:
781 return createStringError(
782 EC: errc::invalid_argument,
783 Fmt: "unknown augmentation character in entry at 0x%" PRIx64, Vals: StartOffset);
784 case 'L':
785 LSDAPointerEncoding = Data.getU8(offset_ptr: &Offset);
786 break;
787 case 'P': {
788 uint32_t PersonalityEncoding = Data.getU8(offset_ptr: &Offset);
789 std::optional<uint64_t> Personality =
790 Data.getEncodedPointer(Offset: &Offset, Encoding: PersonalityEncoding,
791 PCRelOffset: EHFrameAddress ? EHFrameAddress + Offset : 0);
792 // Patch personality address
793 if (Personality)
794 PatcherCallback(*Personality, Offset, PersonalityEncoding);
795 break;
796 }
797 case 'R':
798 FDEPointerEncoding = Data.getU8(offset_ptr: &Offset);
799 break;
800 case 'z':
801 if (i)
802 return createStringError(
803 EC: errc::invalid_argument,
804 Fmt: "'z' must be the first character at 0x%" PRIx64, Vals: StartOffset);
805 // Skip augmentation length
806 Data.getULEB128(offset_ptr: &Offset);
807 break;
808 case 'S':
809 case 'B':
810 break;
811 }
812 }
813 Entries.emplace_back(args: std::make_unique<CIEInfo>(
814 args&: FDEPointerEncoding, args&: LSDAPointerEncoding, args&: AugmentationString));
815 CIEs[StartOffset] = &*Entries.back();
816 return Error::success();
817}
818
819Error EHFrameParser::parseFDE(uint64_t CIEPointer,
820 uint64_t StartStructureOffset) {
821 std::optional<uint64_t> LSDAAddress;
822 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
823
824 // The address size is encoded in the CIE we reference.
825 if (!Cie)
826 return createStringError(EC: errc::invalid_argument,
827 Fmt: "parsing FDE data at 0x%" PRIx64
828 " failed due to missing CIE",
829 Vals: StartStructureOffset);
830 // Patch initial location
831 if (auto Val = Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding,
832 PCRelOffset: EHFrameAddress + Offset)) {
833 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
834 }
835 // Skip address range
836 Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, PCRelOffset: 0);
837
838 // Process augmentation data for this FDE.
839 StringRef AugmentationString = Cie->AugmentationString;
840 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
841 // Skip augmentation length
842 Data.getULEB128(offset_ptr: &Offset);
843 LSDAAddress =
844 Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->LSDAPtrEncoding,
845 PCRelOffset: EHFrameAddress ? Offset + EHFrameAddress : 0);
846 // Patch LSDA address
847 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
848 }
849 return Error::success();
850}
851
852Error EHFrameParser::parse() {
853 while (Data.isValidOffset(offset: Offset)) {
854 const uint64_t StartOffset = Offset;
855
856 uint64_t Length;
857 DwarfFormat Format;
858 std::tie(args&: Length, args&: Format) = Data.getInitialLength(Off: &Offset);
859
860 // If the Length is 0, then this CIE is a terminator
861 if (Length == 0)
862 break;
863
864 const uint64_t StartStructureOffset = Offset;
865 const uint64_t EndStructureOffset = Offset + Length;
866
867 Error Err = Error::success();
868 const uint64_t Id = Data.getRelocatedValue(Size: 4, Off: &Offset,
869 /*SectionIndex=*/nullptr, Err: &Err);
870 if (Err)
871 return Err;
872
873 if (!Id) {
874 if (Error Err = parseCIE(StartOffset))
875 return Err;
876 } else {
877 if (Error Err = parseFDE(CIEPointer: Id, StartStructureOffset))
878 return Err;
879 }
880 Offset = EndStructureOffset;
881 }
882
883 return Error::success();
884}
885
886Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
887 PatcherCallbackTy PatcherCallback) {
888 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
889 return Parser.parse();
890}
891
892} // namespace bolt
893} // namespace llvm
894

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Core/Exceptions.cpp