1 | //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements functions for handling C++ exception meta data. |
10 | // |
11 | // Some of the code is taken from examples/ExceptionDemo |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "bolt/Core/Exceptions.h" |
16 | #include "bolt/Core/BinaryFunction.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/BinaryFormat/Dwarf.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" |
21 | #include "llvm/Support/Casting.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/Debug.h" |
24 | #include "llvm/Support/Errc.h" |
25 | #include "llvm/Support/LEB128.h" |
26 | #include "llvm/Support/MathExtras.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <map> |
29 | |
30 | #undef DEBUG_TYPE |
31 | #define DEBUG_TYPE "bolt-exceptions" |
32 | |
33 | using namespace llvm::dwarf; |
34 | |
35 | namespace opts { |
36 | |
37 | extern llvm::cl::OptionCategory BoltCategory; |
38 | |
39 | extern llvm::cl::opt<unsigned> Verbosity; |
40 | |
41 | static llvm::cl::opt<bool> |
42 | PrintExceptions("print-exceptions" , |
43 | llvm::cl::desc("print exception handling data" ), |
44 | llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); |
45 | |
46 | } // namespace opts |
47 | |
48 | namespace llvm { |
49 | namespace bolt { |
50 | |
51 | // Read and dump the .gcc_exception_table section entry. |
52 | // |
53 | // .gcc_except_table section contains a set of Language-Specific Data Areas - |
54 | // a fancy name for exception handling tables. There's one LSDA entry per |
55 | // function. However, we can't actually tell which function LSDA refers to |
56 | // unless we parse .eh_frame entry that refers to the LSDA. |
57 | // Then inside LSDA most addresses are encoded relative to the function start, |
58 | // so we need the function context in order to get to real addresses. |
59 | // |
60 | // The best visual representation of the tables comprising LSDA and |
61 | // relationships between them is illustrated at: |
62 | // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf |
63 | // Keep in mind that GCC implementation deviates slightly from that document. |
64 | // |
65 | // To summarize, there are 4 tables in LSDA: call site table, actions table, |
66 | // types table, and types index table (for indirection). The main table contains |
67 | // call site entries. Each call site includes a PC range that can throw an |
68 | // exception, a handler (landing pad), and a reference to an entry in the action |
69 | // table. The handler and/or action could be 0. The action entry is a head |
70 | // of a list of actions associated with a call site. The action table contains |
71 | // all such lists (it could be optimized to share list tails). Each action could |
72 | // be either to catch an exception of a given type, to perform a cleanup, or to |
73 | // propagate the exception after filtering it out (e.g. to make sure function |
74 | // exception specification is not violated). Catch action contains a reference |
75 | // to an entry in the type table, and filter action refers to an entry in the |
76 | // type index table to encode a set of types to filter. |
77 | // |
78 | // Call site table follows LSDA header. Action table immediately follows the |
79 | // call site table. |
80 | // |
81 | // Both types table and type index table start at the same location, but they |
82 | // grow in opposite directions (types go up, indices go down). The beginning of |
83 | // these tables is encoded in LSDA header. Sizes for both of the tables are not |
84 | // included anywhere. |
85 | // |
86 | // We have to parse all of the tables to determine their sizes. Then we have |
87 | // to parse the call site table and associate discovered information with |
88 | // actual call instructions and landing pad blocks. |
89 | // |
90 | // For the purpose of rewriting exception handling tables, we can reuse action, |
91 | // and type index tables in their original binary format. |
92 | // |
93 | // Type table could be encoded using position-independent references, and thus |
94 | // may require relocation. |
95 | // |
96 | // Ideally we should be able to re-write LSDA in-place, without the need to |
97 | // allocate a new space for it. Sadly there's no guarantee that the new call |
98 | // site table will be the same size as GCC uses uleb encodings for PC offsets. |
99 | // |
100 | // Note: some functions have LSDA entries with 0 call site entries. |
101 | Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, |
102 | uint64_t LSDASectionAddress) { |
103 | assert(CurrentState == State::Disassembled && "unexpected function state" ); |
104 | |
105 | if (!getLSDAAddress()) |
106 | return Error::success(); |
107 | |
108 | DWARFDataExtractor Data( |
109 | StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), |
110 | LSDASectionData.size()), |
111 | BC.DwCtx->getDWARFObj().isLittleEndian(), |
112 | BC.DwCtx->getDWARFObj().getAddressSize()); |
113 | uint64_t Offset = getLSDAAddress() - LSDASectionAddress; |
114 | assert(Data.isValidOffset(Offset) && "wrong LSDA address" ); |
115 | |
116 | const uint8_t LPStartEncoding = Data.getU8(offset_ptr: &Offset); |
117 | uint64_t LPStart = Address; |
118 | if (LPStartEncoding != dwarf::DW_EH_PE_omit) { |
119 | std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( |
120 | Offset: &Offset, Encoding: LPStartEncoding, AbsPosOffset: Offset + LSDASectionAddress); |
121 | if (!MaybeLPStart) { |
122 | BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: " |
123 | << (unsigned)LPStartEncoding << '\n'; |
124 | return createFatalBOLTError(S: "" ); |
125 | } |
126 | LPStart = *MaybeLPStart; |
127 | } |
128 | |
129 | const uint8_t TTypeEncoding = Data.getU8(offset_ptr: &Offset); |
130 | LSDATypeEncoding = TTypeEncoding; |
131 | size_t TTypeEncodingSize = 0; |
132 | uintptr_t TTypeEnd = 0; |
133 | if (TTypeEncoding != DW_EH_PE_omit) { |
134 | TTypeEnd = Data.getULEB128(offset_ptr: &Offset); |
135 | TTypeEncodingSize = BC.getDWARFEncodingSize(Encoding: TTypeEncoding); |
136 | } |
137 | |
138 | if (opts::PrintExceptions) { |
139 | BC.outs() << "[LSDA at 0x" << Twine::utohexstr(Val: getLSDAAddress()) |
140 | << " for function " << *this << "]:\n" ; |
141 | BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(Val: LPStartEncoding) |
142 | << '\n'; |
143 | BC.outs() << "LPStart = 0x" << Twine::utohexstr(Val: LPStart) << '\n'; |
144 | BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(Val: TTypeEncoding) |
145 | << '\n'; |
146 | BC.outs() << "TType End = " << TTypeEnd << '\n'; |
147 | } |
148 | |
149 | // Table to store list of indices in type table. Entries are uleb128 values. |
150 | const uint64_t TypeIndexTableStart = Offset + TTypeEnd; |
151 | |
152 | // Offset past the last decoded index. |
153 | uint64_t MaxTypeIndexTableOffset = 0; |
154 | |
155 | // Max positive index used in type table. |
156 | unsigned MaxTypeIndex = 0; |
157 | |
158 | // The actual type info table starts at the same location, but grows in |
159 | // opposite direction. TTypeEncoding is used to encode stored values. |
160 | const uint64_t TypeTableStart = Offset + TTypeEnd; |
161 | |
162 | uint8_t CallSiteEncoding = Data.getU8(offset_ptr: &Offset); |
163 | uint32_t CallSiteTableLength = Data.getULEB128(offset_ptr: &Offset); |
164 | uint64_t CallSiteTableStart = Offset; |
165 | uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; |
166 | uint64_t CallSitePtr = CallSiteTableStart; |
167 | uint64_t ActionTableStart = CallSiteTableEnd; |
168 | |
169 | if (opts::PrintExceptions) { |
170 | BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; |
171 | BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n'; |
172 | BC.outs() << '\n'; |
173 | } |
174 | |
175 | this->HasEHRanges = CallSitePtr < CallSiteTableEnd; |
176 | const uint64_t RangeBase = getAddress(); |
177 | while (CallSitePtr < CallSiteTableEnd) { |
178 | uint64_t Start = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding, |
179 | AbsPosOffset: CallSitePtr + LSDASectionAddress); |
180 | uint64_t Length = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding, |
181 | AbsPosOffset: CallSitePtr + LSDASectionAddress); |
182 | uint64_t LandingPad = *Data.getEncodedPointer( |
183 | Offset: &CallSitePtr, Encoding: CallSiteEncoding, AbsPosOffset: CallSitePtr + LSDASectionAddress); |
184 | uint64_t ActionEntry = Data.getULEB128(offset_ptr: &CallSitePtr); |
185 | if (LandingPad) |
186 | LandingPad += LPStart; |
187 | |
188 | if (opts::PrintExceptions) { |
189 | BC.outs() << "Call Site: [0x" << Twine::utohexstr(Val: RangeBase + Start) |
190 | << ", 0x" << Twine::utohexstr(Val: RangeBase + Start + Length) |
191 | << "); landing pad: 0x" << Twine::utohexstr(Val: LandingPad) |
192 | << "; action entry: 0x" << Twine::utohexstr(Val: ActionEntry) |
193 | << "\n" ; |
194 | BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) |
195 | << '\n'; |
196 | } |
197 | |
198 | // Create a handler entry if necessary. |
199 | MCSymbol *LPSymbol = nullptr; |
200 | if (LandingPad) { |
201 | // Verify if landing pad code is located outside current function |
202 | // Support landing pad to builtin_unreachable |
203 | if (LandingPad < Address || LandingPad > Address + getSize()) { |
204 | BinaryFunction *Fragment = |
205 | BC.getBinaryFunctionContainingAddress(Address: LandingPad); |
206 | assert(Fragment != nullptr && |
207 | "BOLT-ERROR: cannot find landing pad fragment" ); |
208 | BC.addInterproceduralReference(Function: this, Address: Fragment->getAddress()); |
209 | BC.processInterproceduralReferences(); |
210 | assert(isParentOrChildOf(*Fragment) && |
211 | "BOLT-ERROR: cannot have landing pads in different functions" ); |
212 | setHasIndirectTargetToSplitFragment(true); |
213 | BC.addFragmentsToSkip(Function: this); |
214 | return Error::success(); |
215 | } |
216 | |
217 | const uint64_t LPOffset = LandingPad - getAddress(); |
218 | if (!getInstructionAtOffset(Offset: LPOffset)) { |
219 | if (opts::Verbosity >= 1) |
220 | BC.errs() << "BOLT-WARNING: landing pad " |
221 | << Twine::utohexstr(Val: LPOffset) |
222 | << " not pointing to an instruction in function " << *this |
223 | << " - ignoring.\n" ; |
224 | } else { |
225 | auto Label = Labels.find(x: LPOffset); |
226 | if (Label != Labels.end()) { |
227 | LPSymbol = Label->second; |
228 | } else { |
229 | LPSymbol = BC.Ctx->createNamedTempSymbol(Name: "LP" ); |
230 | Labels[LPOffset] = LPSymbol; |
231 | } |
232 | } |
233 | } |
234 | |
235 | // Mark all call instructions in the range. |
236 | auto II = Instructions.find(x: Start); |
237 | auto IE = Instructions.end(); |
238 | assert(II != IE && "exception range not pointing to an instruction" ); |
239 | do { |
240 | MCInst &Instruction = II->second; |
241 | if (BC.MIB->isCall(Inst: Instruction) && |
242 | !BC.MIB->getConditionalTailCall(Inst: Instruction)) { |
243 | assert(!BC.MIB->isInvoke(Instruction) && |
244 | "overlapping exception ranges detected" ); |
245 | // Add extra operands to a call instruction making it an invoke from |
246 | // now on. |
247 | BC.MIB->addEHInfo(Inst&: Instruction, |
248 | LP: MCPlus::MCLandingPad(LPSymbol, ActionEntry)); |
249 | } |
250 | ++II; |
251 | } while (II != IE && II->first < Start + Length); |
252 | |
253 | if (ActionEntry != 0) { |
254 | auto printType = [&](int Index, raw_ostream &OS) { |
255 | assert(Index > 0 && "only positive indices are valid" ); |
256 | uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; |
257 | const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; |
258 | uint64_t TypeAddress = |
259 | *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, AbsPosOffset: TTEntryAddress); |
260 | if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) |
261 | TypeAddress = 0; |
262 | if (TypeAddress == 0) { |
263 | OS << "<all>" ; |
264 | return; |
265 | } |
266 | if (TTypeEncoding & DW_EH_PE_indirect) { |
267 | ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress); |
268 | assert(PointerOrErr && "failed to decode indirect address" ); |
269 | TypeAddress = *PointerOrErr; |
270 | } |
271 | if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(Address: TypeAddress)) |
272 | OS << TypeSymBD->getName(); |
273 | else |
274 | OS << "0x" << Twine::utohexstr(Val: TypeAddress); |
275 | }; |
276 | if (opts::PrintExceptions) |
277 | BC.outs() << " actions: " ; |
278 | uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; |
279 | int64_t ActionType; |
280 | int64_t ActionNext; |
281 | const char *Sep = "" ; |
282 | do { |
283 | ActionType = Data.getSLEB128(OffsetPtr: &ActionPtr); |
284 | const uint32_t Self = ActionPtr; |
285 | ActionNext = Data.getSLEB128(OffsetPtr: &ActionPtr); |
286 | if (opts::PrintExceptions) |
287 | BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") " ; |
288 | if (ActionType == 0) { |
289 | if (opts::PrintExceptions) |
290 | BC.outs() << "cleanup" ; |
291 | } else if (ActionType > 0) { |
292 | // It's an index into a type table. |
293 | MaxTypeIndex = |
294 | std::max(a: MaxTypeIndex, b: static_cast<unsigned>(ActionType)); |
295 | if (opts::PrintExceptions) { |
296 | BC.outs() << "catch type " ; |
297 | printType(ActionType, BC.outs()); |
298 | } |
299 | } else { // ActionType < 0 |
300 | if (opts::PrintExceptions) |
301 | BC.outs() << "filter exception types " ; |
302 | const char *TSep = "" ; |
303 | // ActionType is a negative *byte* offset into *uleb128-encoded* table |
304 | // of indices with base 1. |
305 | // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are |
306 | // encoded using uleb128 thus we cannot directly dereference them. |
307 | uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; |
308 | while (uint64_t Index = Data.getULEB128(offset_ptr: &TypeIndexTablePtr)) { |
309 | MaxTypeIndex = std::max(a: MaxTypeIndex, b: static_cast<unsigned>(Index)); |
310 | if (opts::PrintExceptions) { |
311 | BC.outs() << TSep; |
312 | printType(Index, BC.outs()); |
313 | TSep = ", " ; |
314 | } |
315 | } |
316 | MaxTypeIndexTableOffset = std::max( |
317 | a: MaxTypeIndexTableOffset, b: TypeIndexTablePtr - TypeIndexTableStart); |
318 | } |
319 | |
320 | Sep = "; " ; |
321 | |
322 | ActionPtr = Self + ActionNext; |
323 | } while (ActionNext); |
324 | if (opts::PrintExceptions) |
325 | BC.outs() << '\n'; |
326 | } |
327 | } |
328 | if (opts::PrintExceptions) |
329 | BC.outs() << '\n'; |
330 | |
331 | assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= |
332 | Data.getData().size() && |
333 | "LSDA entry has crossed section boundary" ); |
334 | |
335 | if (TTypeEnd) { |
336 | LSDAActionTable = LSDASectionData.slice( |
337 | N: ActionTableStart, M: TypeIndexTableStart - |
338 | MaxTypeIndex * TTypeEncodingSize - |
339 | ActionTableStart); |
340 | for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { |
341 | uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; |
342 | const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; |
343 | uint64_t TypeAddress = |
344 | *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, AbsPosOffset: TTEntryAddress); |
345 | if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) |
346 | TypeAddress = 0; |
347 | if (TTypeEncoding & DW_EH_PE_indirect) { |
348 | LSDATypeAddressTable.emplace_back(Args&: TypeAddress); |
349 | if (TypeAddress) { |
350 | ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress); |
351 | assert(PointerOrErr && "failed to decode indirect address" ); |
352 | TypeAddress = *PointerOrErr; |
353 | } |
354 | } |
355 | LSDATypeTable.emplace_back(Args&: TypeAddress); |
356 | } |
357 | LSDATypeIndexTable = |
358 | LSDASectionData.slice(N: TypeIndexTableStart, M: MaxTypeIndexTableOffset); |
359 | } |
360 | return Error::success(); |
361 | } |
362 | |
363 | void BinaryFunction::updateEHRanges() { |
364 | if (getSize() == 0) |
365 | return; |
366 | |
367 | assert(CurrentState == State::CFG_Finalized && "unexpected state" ); |
368 | |
369 | // Build call sites table. |
370 | struct EHInfo { |
371 | const MCSymbol *LP; // landing pad |
372 | uint64_t Action; |
373 | }; |
374 | |
375 | // Sites to update. |
376 | CallSitesList Sites; |
377 | |
378 | for (FunctionFragment &FF : getLayout().fragments()) { |
379 | // If previous call can throw, this is its exception handler. |
380 | EHInfo PreviousEH = {.LP: nullptr, .Action: 0}; |
381 | |
382 | // Marker for the beginning of exceptions range. |
383 | const MCSymbol *StartRange = nullptr; |
384 | |
385 | for (BinaryBasicBlock *const BB : FF) { |
386 | for (MCInst &Instr : *BB) { |
387 | if (!BC.MIB->isCall(Inst: Instr)) |
388 | continue; |
389 | |
390 | // Instruction can throw an exception that should be handled. |
391 | const bool Throws = BC.MIB->isInvoke(Inst: Instr); |
392 | |
393 | // Ignore the call if it's a continuation of a no-throw gap. |
394 | if (!Throws && !StartRange) |
395 | continue; |
396 | |
397 | // Extract exception handling information from the instruction. |
398 | const MCSymbol *LP = nullptr; |
399 | uint64_t Action = 0; |
400 | if (const std::optional<MCPlus::MCLandingPad> EHInfo = |
401 | BC.MIB->getEHInfo(Inst: Instr)) |
402 | std::tie(args&: LP, args&: Action) = *EHInfo; |
403 | |
404 | // No action if the exception handler has not changed. |
405 | if (Throws && StartRange && PreviousEH.LP == LP && |
406 | PreviousEH.Action == Action) |
407 | continue; |
408 | |
409 | // Same symbol is used for the beginning and the end of the range. |
410 | MCSymbol *EHSymbol; |
411 | if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Inst: Instr)) { |
412 | EHSymbol = InstrLabel; |
413 | } else { |
414 | std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex); |
415 | EHSymbol = BC.MIB->getOrCreateInstLabel(Inst&: Instr, Name: "EH" , Ctx: BC.Ctx.get()); |
416 | } |
417 | |
418 | // At this point we could be in one of the following states: |
419 | // |
420 | // I. Exception handler has changed and we need to close previous range |
421 | // and start a new one. |
422 | // |
423 | // II. Start a new exception range after the gap. |
424 | // |
425 | // III. Close current exception range and start a new gap. |
426 | const MCSymbol *EndRange; |
427 | if (StartRange) { |
428 | // I, III: |
429 | EndRange = EHSymbol; |
430 | } else { |
431 | // II: |
432 | StartRange = EHSymbol; |
433 | EndRange = nullptr; |
434 | } |
435 | |
436 | // Close the previous range. |
437 | if (EndRange) |
438 | Sites.emplace_back( |
439 | Args: FF.getFragmentNum(), |
440 | Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action}); |
441 | |
442 | if (Throws) { |
443 | // I, II: |
444 | StartRange = EHSymbol; |
445 | PreviousEH = EHInfo{.LP: LP, .Action: Action}; |
446 | } else { |
447 | StartRange = nullptr; |
448 | } |
449 | } |
450 | } |
451 | |
452 | // Check if we need to close the range. |
453 | if (StartRange) { |
454 | const MCSymbol *EndRange = getFunctionEndLabel(Fragment: FF.getFragmentNum()); |
455 | Sites.emplace_back( |
456 | Args: FF.getFragmentNum(), |
457 | Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action}); |
458 | } |
459 | } |
460 | |
461 | addCallSites(NewCallSites: Sites); |
462 | } |
463 | |
464 | const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; |
465 | |
466 | CFIReaderWriter::CFIReaderWriter(BinaryContext &BC, |
467 | const DWARFDebugFrame &EHFrame) |
468 | : BC(BC) { |
469 | // Prepare FDEs for fast lookup |
470 | for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { |
471 | const auto *CurFDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
472 | // Skip CIEs. |
473 | if (!CurFDE) |
474 | continue; |
475 | // There could me multiple FDEs with the same initial address, and perhaps |
476 | // different sizes (address ranges). Use the first entry with non-zero size. |
477 | auto FDEI = FDEs.lower_bound(x: CurFDE->getInitialLocation()); |
478 | if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { |
479 | if (CurFDE->getAddressRange()) { |
480 | if (FDEI->second->getAddressRange() == 0) { |
481 | FDEI->second = CurFDE; |
482 | } else if (opts::Verbosity > 0) { |
483 | BC.errs() << "BOLT-WARNING: different FDEs for function at 0x" |
484 | << Twine::utohexstr(Val: FDEI->first) |
485 | << " detected; sizes: " << FDEI->second->getAddressRange() |
486 | << " and " << CurFDE->getAddressRange() << '\n'; |
487 | } |
488 | } |
489 | } else { |
490 | FDEs.emplace_hint(pos: FDEI, args: CurFDE->getInitialLocation(), args&: CurFDE); |
491 | } |
492 | } |
493 | } |
494 | |
495 | bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { |
496 | uint64_t Address = Function.getAddress(); |
497 | auto I = FDEs.find(x: Address); |
498 | // Ignore zero-length FDE ranges. |
499 | if (I == FDEs.end() || !I->second->getAddressRange()) |
500 | return true; |
501 | |
502 | const FDE &CurFDE = *I->second; |
503 | std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); |
504 | Function.setLSDAAddress(LSDA ? *LSDA : 0); |
505 | |
506 | uint64_t Offset = Function.getFirstInstructionOffset(); |
507 | uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); |
508 | uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); |
509 | if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { |
510 | Function.setPersonalityFunction( |
511 | *CurFDE.getLinkedCIE()->getPersonalityAddress()); |
512 | Function.setPersonalityEncoding( |
513 | *CurFDE.getLinkedCIE()->getPersonalityEncoding()); |
514 | } |
515 | |
516 | auto decodeFrameInstruction = [this, &Function, &Offset, Address, |
517 | CodeAlignment, DataAlignment]( |
518 | const CFIProgram::Instruction &Instr) { |
519 | uint8_t Opcode = Instr.Opcode; |
520 | if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) |
521 | Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; |
522 | switch (Instr.Opcode) { |
523 | case DW_CFA_nop: |
524 | break; |
525 | case DW_CFA_advance_loc4: |
526 | case DW_CFA_advance_loc2: |
527 | case DW_CFA_advance_loc1: |
528 | case DW_CFA_advance_loc: |
529 | // Advance our current address |
530 | Offset += CodeAlignment * int64_t(Instr.Ops[0]); |
531 | break; |
532 | case DW_CFA_offset_extended_sf: |
533 | Function.addCFIInstruction( |
534 | Offset, |
535 | Inst: MCCFIInstruction::createOffset( |
536 | L: nullptr, Register: Instr.Ops[0], Offset: DataAlignment * int64_t(Instr.Ops[1]))); |
537 | break; |
538 | case DW_CFA_offset_extended: |
539 | case DW_CFA_offset: |
540 | Function.addCFIInstruction( |
541 | Offset, Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Instr.Ops[0], |
542 | Offset: DataAlignment * Instr.Ops[1])); |
543 | break; |
544 | case DW_CFA_restore_extended: |
545 | case DW_CFA_restore: |
546 | Function.addCFIInstruction( |
547 | Offset, Inst: MCCFIInstruction::createRestore(L: nullptr, Register: Instr.Ops[0])); |
548 | break; |
549 | case DW_CFA_set_loc: |
550 | assert(Instr.Ops[0] >= Address && "set_loc out of function bounds" ); |
551 | assert(Instr.Ops[0] <= Address + Function.getSize() && |
552 | "set_loc out of function bounds" ); |
553 | Offset = Instr.Ops[0] - Address; |
554 | break; |
555 | |
556 | case DW_CFA_undefined: |
557 | Function.addCFIInstruction( |
558 | Offset, Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: Instr.Ops[0])); |
559 | break; |
560 | case DW_CFA_same_value: |
561 | Function.addCFIInstruction( |
562 | Offset, Inst: MCCFIInstruction::createSameValue(L: nullptr, Register: Instr.Ops[0])); |
563 | break; |
564 | case DW_CFA_register: |
565 | Function.addCFIInstruction( |
566 | Offset, Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: Instr.Ops[0], |
567 | Register2: Instr.Ops[1])); |
568 | break; |
569 | case DW_CFA_remember_state: |
570 | Function.addCFIInstruction( |
571 | Offset, Inst: MCCFIInstruction::createRememberState(L: nullptr)); |
572 | break; |
573 | case DW_CFA_restore_state: |
574 | Function.addCFIInstruction(Offset, |
575 | Inst: MCCFIInstruction::createRestoreState(L: nullptr)); |
576 | break; |
577 | case DW_CFA_def_cfa: |
578 | Function.addCFIInstruction( |
579 | Offset, |
580 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], Offset: Instr.Ops[1])); |
581 | break; |
582 | case DW_CFA_def_cfa_sf: |
583 | Function.addCFIInstruction( |
584 | Offset, |
585 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], |
586 | Offset: DataAlignment * int64_t(Instr.Ops[1]))); |
587 | break; |
588 | case DW_CFA_def_cfa_register: |
589 | Function.addCFIInstruction(Offset, Inst: MCCFIInstruction::createDefCfaRegister( |
590 | L: nullptr, Register: Instr.Ops[0])); |
591 | break; |
592 | case DW_CFA_def_cfa_offset: |
593 | Function.addCFIInstruction( |
594 | Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Instr.Ops[0])); |
595 | break; |
596 | case DW_CFA_def_cfa_offset_sf: |
597 | Function.addCFIInstruction( |
598 | Offset, Inst: MCCFIInstruction::cfiDefCfaOffset( |
599 | L: nullptr, Offset: DataAlignment * int64_t(Instr.Ops[0]))); |
600 | break; |
601 | case DW_CFA_GNU_args_size: |
602 | Function.addCFIInstruction( |
603 | Offset, Inst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Instr.Ops[0])); |
604 | Function.setUsesGnuArgsSize(); |
605 | break; |
606 | case DW_CFA_val_offset_sf: |
607 | case DW_CFA_val_offset: |
608 | if (opts::Verbosity >= 1) { |
609 | BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n" ; |
610 | } |
611 | return false; |
612 | case DW_CFA_def_cfa_expression: |
613 | case DW_CFA_val_expression: |
614 | case DW_CFA_expression: { |
615 | StringRef ExprBytes = Instr.Expression->getData(); |
616 | std::string Str; |
617 | raw_string_ostream OS(Str); |
618 | // Manually encode this instruction using CFI escape |
619 | OS << Opcode; |
620 | if (Opcode != DW_CFA_def_cfa_expression) |
621 | encodeULEB128(Value: Instr.Ops[0], OS); |
622 | encodeULEB128(Value: ExprBytes.size(), OS); |
623 | OS << ExprBytes; |
624 | Function.addCFIInstruction( |
625 | Offset, Inst: MCCFIInstruction::createEscape(L: nullptr, Vals: OS.str())); |
626 | break; |
627 | } |
628 | case DW_CFA_MIPS_advance_loc8: |
629 | if (opts::Verbosity >= 1) |
630 | BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n" ; |
631 | return false; |
632 | case DW_CFA_GNU_window_save: |
633 | // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same |
634 | // id but mean different things. The latter is used in AArch64. |
635 | if (Function.getBinaryContext().isAArch64()) { |
636 | Function.addCFIInstruction( |
637 | Offset, Inst: MCCFIInstruction::createNegateRAState(L: nullptr)); |
638 | break; |
639 | } |
640 | if (opts::Verbosity >= 1) |
641 | BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n" ; |
642 | return false; |
643 | case DW_CFA_lo_user: |
644 | case DW_CFA_hi_user: |
645 | if (opts::Verbosity >= 1) |
646 | BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n" ; |
647 | return false; |
648 | default: |
649 | if (opts::Verbosity >= 1) |
650 | BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: " |
651 | << Instr.Opcode << '\n'; |
652 | return false; |
653 | } |
654 | |
655 | return true; |
656 | }; |
657 | |
658 | for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) |
659 | if (!decodeFrameInstruction(Instr)) |
660 | return false; |
661 | |
662 | for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) |
663 | if (!decodeFrameInstruction(Instr)) |
664 | return false; |
665 | |
666 | return true; |
667 | } |
668 | |
669 | std::vector<char> CFIReaderWriter::( |
670 | const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, |
671 | uint64_t , |
672 | std::vector<uint64_t> &FailedAddresses) const { |
673 | // Common PC -> FDE map to be written into .eh_frame_hdr. |
674 | std::map<uint64_t, uint64_t> PCToFDE; |
675 | |
676 | // Presort array for binary search. |
677 | llvm::sort(C&: FailedAddresses); |
678 | |
679 | // Initialize PCToFDE using NewEHFrame. |
680 | for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { |
681 | const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
682 | if (FDE == nullptr) |
683 | continue; |
684 | const uint64_t FuncAddress = FDE->getInitialLocation(); |
685 | const uint64_t FDEAddress = |
686 | NewEHFrame.getEHFrameAddress() + FDE->getOffset(); |
687 | |
688 | // Ignore unused FDEs. |
689 | if (FuncAddress == 0) |
690 | continue; |
691 | |
692 | // Add the address to the map unless we failed to write it. |
693 | if (!std::binary_search(first: FailedAddresses.begin(), last: FailedAddresses.end(), |
694 | val: FuncAddress)) { |
695 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" |
696 | << Twine::utohexstr(FuncAddress) << " is at 0x" |
697 | << Twine::utohexstr(FDEAddress) << '\n'); |
698 | PCToFDE[FuncAddress] = FDEAddress; |
699 | } |
700 | }; |
701 | |
702 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " |
703 | << llvm::size(NewEHFrame.entries()) << " entries\n" ); |
704 | |
705 | // Add entries from the original .eh_frame corresponding to the functions |
706 | // that we did not update. |
707 | for (const dwarf::FrameEntry &Entry : OldEHFrame) { |
708 | const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
709 | if (FDE == nullptr) |
710 | continue; |
711 | const uint64_t FuncAddress = FDE->getInitialLocation(); |
712 | const uint64_t FDEAddress = |
713 | OldEHFrame.getEHFrameAddress() + FDE->getOffset(); |
714 | |
715 | // Add the address if we failed to write it. |
716 | if (PCToFDE.count(x: FuncAddress) == 0) { |
717 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" |
718 | << Twine::utohexstr(FuncAddress) << " is at 0x" |
719 | << Twine::utohexstr(FDEAddress) << '\n'); |
720 | PCToFDE[FuncAddress] = FDEAddress; |
721 | } |
722 | }; |
723 | |
724 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " |
725 | << llvm::size(OldEHFrame.entries()) << " entries\n" ); |
726 | |
727 | // Generate a new .eh_frame_hdr based on the new map. |
728 | |
729 | // Header plus table of entries of size 8 bytes. |
730 | std::vector<char> (12 + PCToFDE.size() * 8); |
731 | |
732 | // Version is 1. |
733 | EHFrameHeader[0] = 1; |
734 | // Encoding of the eh_frame pointer. |
735 | EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; |
736 | // Encoding of the count field to follow. |
737 | EHFrameHeader[2] = DW_EH_PE_udata4; |
738 | // Encoding of the table entries - 4-byte offset from the start of the header. |
739 | EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; |
740 | |
741 | // Address of eh_frame. Use the new one. |
742 | support::ulittle32_t::ref(EHFrameHeader.data() + 4) = |
743 | NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); |
744 | |
745 | // Number of entries in the table (FDE count). |
746 | support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); |
747 | |
748 | // Write the table at offset 12. |
749 | char *Ptr = EHFrameHeader.data(); |
750 | uint32_t Offset = 12; |
751 | for (const auto &PCI : PCToFDE) { |
752 | int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; |
753 | assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds" ); |
754 | support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; |
755 | Offset += 4; |
756 | int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; |
757 | assert(isInt<32>(FDEOffset) && "FDE offset out of bounds" ); |
758 | support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; |
759 | Offset += 4; |
760 | } |
761 | |
762 | return EHFrameHeader; |
763 | } |
764 | |
765 | Error EHFrameParser::parseCIE(uint64_t StartOffset) { |
766 | uint8_t Version = Data.getU8(offset_ptr: &Offset); |
767 | const char *Augmentation = Data.getCStr(OffsetPtr: &Offset); |
768 | StringRef AugmentationString(Augmentation ? Augmentation : "" ); |
769 | uint8_t AddressSize = |
770 | Version < 4 ? Data.getAddressSize() : Data.getU8(offset_ptr: &Offset); |
771 | Data.setAddressSize(AddressSize); |
772 | // Skip segment descriptor size |
773 | if (Version >= 4) |
774 | Offset += 1; |
775 | // Skip code alignment factor |
776 | Data.getULEB128(offset_ptr: &Offset); |
777 | // Skip data alignment |
778 | Data.getSLEB128(OffsetPtr: &Offset); |
779 | // Skip return address register |
780 | if (Version == 1) |
781 | Offset += 1; |
782 | else |
783 | Data.getULEB128(offset_ptr: &Offset); |
784 | |
785 | uint32_t FDEPointerEncoding = DW_EH_PE_absptr; |
786 | uint32_t LSDAPointerEncoding = DW_EH_PE_omit; |
787 | // Walk the augmentation string to get all the augmentation data. |
788 | for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { |
789 | switch (AugmentationString[i]) { |
790 | default: |
791 | return createStringError( |
792 | EC: errc::invalid_argument, |
793 | Fmt: "unknown augmentation character in entry at 0x%" PRIx64, Vals: StartOffset); |
794 | case 'L': |
795 | LSDAPointerEncoding = Data.getU8(offset_ptr: &Offset); |
796 | break; |
797 | case 'P': { |
798 | uint32_t PersonalityEncoding = Data.getU8(offset_ptr: &Offset); |
799 | std::optional<uint64_t> Personality = |
800 | Data.getEncodedPointer(Offset: &Offset, Encoding: PersonalityEncoding, |
801 | AbsPosOffset: EHFrameAddress ? EHFrameAddress + Offset : 0); |
802 | // Patch personality address |
803 | if (Personality) |
804 | PatcherCallback(*Personality, Offset, PersonalityEncoding); |
805 | break; |
806 | } |
807 | case 'R': |
808 | FDEPointerEncoding = Data.getU8(offset_ptr: &Offset); |
809 | break; |
810 | case 'z': |
811 | if (i) |
812 | return createStringError( |
813 | EC: errc::invalid_argument, |
814 | Fmt: "'z' must be the first character at 0x%" PRIx64, Vals: StartOffset); |
815 | // Skip augmentation length |
816 | Data.getULEB128(offset_ptr: &Offset); |
817 | break; |
818 | case 'S': |
819 | case 'B': |
820 | break; |
821 | } |
822 | } |
823 | Entries.emplace_back(args: std::make_unique<CIEInfo>( |
824 | args&: FDEPointerEncoding, args&: LSDAPointerEncoding, args&: AugmentationString)); |
825 | CIEs[StartOffset] = &*Entries.back(); |
826 | return Error::success(); |
827 | } |
828 | |
829 | Error EHFrameParser::parseFDE(uint64_t CIEPointer, |
830 | uint64_t StartStructureOffset) { |
831 | std::optional<uint64_t> LSDAAddress; |
832 | CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; |
833 | |
834 | // The address size is encoded in the CIE we reference. |
835 | if (!Cie) |
836 | return createStringError(EC: errc::invalid_argument, |
837 | Fmt: "parsing FDE data at 0x%" PRIx64 |
838 | " failed due to missing CIE" , |
839 | Vals: StartStructureOffset); |
840 | // Patch initial location |
841 | if (auto Val = Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, |
842 | AbsPosOffset: EHFrameAddress + Offset)) { |
843 | PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); |
844 | } |
845 | // Skip address range |
846 | Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, AbsPosOffset: 0); |
847 | |
848 | // Process augmentation data for this FDE. |
849 | StringRef AugmentationString = Cie->AugmentationString; |
850 | if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { |
851 | // Skip augmentation length |
852 | Data.getULEB128(offset_ptr: &Offset); |
853 | LSDAAddress = |
854 | Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->LSDAPtrEncoding, |
855 | AbsPosOffset: EHFrameAddress ? Offset + EHFrameAddress : 0); |
856 | // Patch LSDA address |
857 | PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); |
858 | } |
859 | return Error::success(); |
860 | } |
861 | |
862 | Error EHFrameParser::parse() { |
863 | while (Data.isValidOffset(offset: Offset)) { |
864 | const uint64_t StartOffset = Offset; |
865 | |
866 | uint64_t Length; |
867 | DwarfFormat Format; |
868 | std::tie(args&: Length, args&: Format) = Data.getInitialLength(Off: &Offset); |
869 | |
870 | // If the Length is 0, then this CIE is a terminator |
871 | if (Length == 0) |
872 | break; |
873 | |
874 | const uint64_t StartStructureOffset = Offset; |
875 | const uint64_t EndStructureOffset = Offset + Length; |
876 | |
877 | Error Err = Error::success(); |
878 | const uint64_t Id = Data.getRelocatedValue(Size: 4, Off: &Offset, |
879 | /*SectionIndex=*/nullptr, Err: &Err); |
880 | if (Err) |
881 | return Err; |
882 | |
883 | if (!Id) { |
884 | if (Error Err = parseCIE(StartOffset)) |
885 | return Err; |
886 | } else { |
887 | if (Error Err = parseFDE(CIEPointer: Id, StartStructureOffset)) |
888 | return Err; |
889 | } |
890 | Offset = EndStructureOffset; |
891 | } |
892 | |
893 | return Error::success(); |
894 | } |
895 | |
896 | Error EHFrameParser::(DWARFDataExtractor Data, uint64_t EHFrameAddress, |
897 | PatcherCallbackTy PatcherCallback) { |
898 | EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); |
899 | return Parser.parse(); |
900 | } |
901 | |
902 | } // namespace bolt |
903 | } // namespace llvm |
904 | |