1 | //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements functions for handling C++ exception meta data. |
10 | // |
11 | // Some of the code is taken from examples/ExceptionDemo |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "bolt/Core/Exceptions.h" |
16 | #include "bolt/Core/BinaryFunction.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/BinaryFormat/Dwarf.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" |
21 | #include "llvm/Support/Casting.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/Debug.h" |
24 | #include "llvm/Support/Errc.h" |
25 | #include "llvm/Support/LEB128.h" |
26 | #include "llvm/Support/MathExtras.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <map> |
29 | |
30 | #undef DEBUG_TYPE |
31 | #define DEBUG_TYPE "bolt-exceptions" |
32 | |
33 | using namespace llvm::dwarf; |
34 | |
35 | namespace opts { |
36 | |
37 | extern llvm::cl::OptionCategory BoltCategory; |
38 | |
39 | extern llvm::cl::opt<unsigned> Verbosity; |
40 | |
41 | static llvm::cl::opt<bool> |
42 | PrintExceptions("print-exceptions" , |
43 | llvm::cl::desc("print exception handling data" ), |
44 | llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); |
45 | |
46 | } // namespace opts |
47 | |
48 | namespace llvm { |
49 | namespace bolt { |
50 | |
51 | // Read and dump the .gcc_exception_table section entry. |
52 | // |
53 | // .gcc_except_table section contains a set of Language-Specific Data Areas - |
54 | // a fancy name for exception handling tables. There's one LSDA entry per |
55 | // function. However, we can't actually tell which function LSDA refers to |
56 | // unless we parse .eh_frame entry that refers to the LSDA. |
57 | // Then inside LSDA most addresses are encoded relative to the function start, |
58 | // so we need the function context in order to get to real addresses. |
59 | // |
60 | // The best visual representation of the tables comprising LSDA and |
61 | // relationships between them is illustrated at: |
62 | // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf |
63 | // Keep in mind that GCC implementation deviates slightly from that document. |
64 | // |
65 | // To summarize, there are 4 tables in LSDA: call site table, actions table, |
66 | // types table, and types index table (for indirection). The main table contains |
67 | // call site entries. Each call site includes a PC range that can throw an |
68 | // exception, a handler (landing pad), and a reference to an entry in the action |
69 | // table. The handler and/or action could be 0. The action entry is a head |
70 | // of a list of actions associated with a call site. The action table contains |
71 | // all such lists (it could be optimized to share list tails). Each action could |
72 | // be either to catch an exception of a given type, to perform a cleanup, or to |
73 | // propagate the exception after filtering it out (e.g. to make sure function |
74 | // exception specification is not violated). Catch action contains a reference |
75 | // to an entry in the type table, and filter action refers to an entry in the |
76 | // type index table to encode a set of types to filter. |
77 | // |
78 | // Call site table follows LSDA header. Action table immediately follows the |
79 | // call site table. |
80 | // |
81 | // Both types table and type index table start at the same location, but they |
82 | // grow in opposite directions (types go up, indices go down). The beginning of |
83 | // these tables is encoded in LSDA header. Sizes for both of the tables are not |
84 | // included anywhere. |
85 | // |
86 | // We have to parse all of the tables to determine their sizes. Then we have |
87 | // to parse the call site table and associate discovered information with |
88 | // actual call instructions and landing pad blocks. |
89 | // |
90 | // For the purpose of rewriting exception handling tables, we can reuse action, |
91 | // and type index tables in their original binary format. |
92 | // |
93 | // Type table could be encoded using position-independent references, and thus |
94 | // may require relocation. |
95 | // |
96 | // Ideally we should be able to re-write LSDA in-place, without the need to |
97 | // allocate a new space for it. Sadly there's no guarantee that the new call |
98 | // site table will be the same size as GCC uses uleb encodings for PC offsets. |
99 | // |
100 | // Note: some functions have LSDA entries with 0 call site entries. |
101 | Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, |
102 | uint64_t LSDASectionAddress) { |
103 | assert(CurrentState == State::Disassembled && "unexpected function state" ); |
104 | |
105 | if (!getLSDAAddress()) |
106 | return Error::success(); |
107 | |
108 | DWARFDataExtractor Data( |
109 | StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), |
110 | LSDASectionData.size()), |
111 | BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
112 | uint64_t Offset = getLSDAAddress() - LSDASectionAddress; |
113 | assert(Data.isValidOffset(Offset) && "wrong LSDA address" ); |
114 | |
115 | const uint8_t LPStartEncoding = Data.getU8(offset_ptr: &Offset); |
116 | uint64_t LPStart = Address; |
117 | if (LPStartEncoding != dwarf::DW_EH_PE_omit) { |
118 | std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( |
119 | Offset: &Offset, Encoding: LPStartEncoding, PCRelOffset: Offset + LSDASectionAddress); |
120 | if (!MaybeLPStart) { |
121 | BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: " |
122 | << (unsigned)LPStartEncoding << '\n'; |
123 | return createFatalBOLTError(S: "" ); |
124 | } |
125 | LPStart = *MaybeLPStart; |
126 | } |
127 | |
128 | const uint8_t TTypeEncoding = Data.getU8(offset_ptr: &Offset); |
129 | LSDATypeEncoding = TTypeEncoding; |
130 | size_t TTypeEncodingSize = 0; |
131 | uintptr_t TTypeEnd = 0; |
132 | if (TTypeEncoding != DW_EH_PE_omit) { |
133 | TTypeEnd = Data.getULEB128(offset_ptr: &Offset); |
134 | TTypeEncodingSize = BC.getDWARFEncodingSize(Encoding: TTypeEncoding); |
135 | } |
136 | |
137 | if (opts::PrintExceptions) { |
138 | BC.outs() << "[LSDA at 0x" << Twine::utohexstr(Val: getLSDAAddress()) |
139 | << " for function " << *this << "]:\n" ; |
140 | BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(Val: LPStartEncoding) |
141 | << '\n'; |
142 | BC.outs() << "LPStart = 0x" << Twine::utohexstr(Val: LPStart) << '\n'; |
143 | BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(Val: TTypeEncoding) |
144 | << '\n'; |
145 | BC.outs() << "TType End = " << TTypeEnd << '\n'; |
146 | } |
147 | |
148 | // Table to store list of indices in type table. Entries are uleb128 values. |
149 | const uint64_t TypeIndexTableStart = Offset + TTypeEnd; |
150 | |
151 | // Offset past the last decoded index. |
152 | uint64_t MaxTypeIndexTableOffset = 0; |
153 | |
154 | // Max positive index used in type table. |
155 | unsigned MaxTypeIndex = 0; |
156 | |
157 | // The actual type info table starts at the same location, but grows in |
158 | // opposite direction. TTypeEncoding is used to encode stored values. |
159 | const uint64_t TypeTableStart = Offset + TTypeEnd; |
160 | |
161 | uint8_t CallSiteEncoding = Data.getU8(offset_ptr: &Offset); |
162 | uint32_t CallSiteTableLength = Data.getULEB128(offset_ptr: &Offset); |
163 | uint64_t CallSiteTableStart = Offset; |
164 | uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; |
165 | uint64_t CallSitePtr = CallSiteTableStart; |
166 | uint64_t ActionTableStart = CallSiteTableEnd; |
167 | |
168 | if (opts::PrintExceptions) { |
169 | BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; |
170 | BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n'; |
171 | BC.outs() << '\n'; |
172 | } |
173 | |
174 | this->HasEHRanges = CallSitePtr < CallSiteTableEnd; |
175 | const uint64_t RangeBase = getAddress(); |
176 | while (CallSitePtr < CallSiteTableEnd) { |
177 | uint64_t Start = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding, |
178 | PCRelOffset: CallSitePtr + LSDASectionAddress); |
179 | uint64_t Length = *Data.getEncodedPointer(Offset: &CallSitePtr, Encoding: CallSiteEncoding, |
180 | PCRelOffset: CallSitePtr + LSDASectionAddress); |
181 | uint64_t LandingPad = *Data.getEncodedPointer( |
182 | Offset: &CallSitePtr, Encoding: CallSiteEncoding, PCRelOffset: CallSitePtr + LSDASectionAddress); |
183 | uint64_t ActionEntry = Data.getULEB128(offset_ptr: &CallSitePtr); |
184 | if (LandingPad) |
185 | LandingPad += LPStart; |
186 | |
187 | if (opts::PrintExceptions) { |
188 | BC.outs() << "Call Site: [0x" << Twine::utohexstr(Val: RangeBase + Start) |
189 | << ", 0x" << Twine::utohexstr(Val: RangeBase + Start + Length) |
190 | << "); landing pad: 0x" << Twine::utohexstr(Val: LandingPad) |
191 | << "; action entry: 0x" << Twine::utohexstr(Val: ActionEntry) |
192 | << "\n" ; |
193 | BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) |
194 | << '\n'; |
195 | } |
196 | |
197 | // Create a handler entry if necessary. |
198 | MCSymbol *LPSymbol = nullptr; |
199 | if (LandingPad) { |
200 | // Verify if landing pad code is located outside current function |
201 | // Support landing pad to builtin_unreachable |
202 | if (LandingPad < Address || LandingPad > Address + getSize()) { |
203 | BinaryFunction *Fragment = |
204 | BC.getBinaryFunctionContainingAddress(Address: LandingPad); |
205 | assert(Fragment != nullptr && |
206 | "BOLT-ERROR: cannot find landing pad fragment" ); |
207 | BC.addInterproceduralReference(Function: this, Address: Fragment->getAddress()); |
208 | BC.processInterproceduralReferences(); |
209 | assert(BC.areRelatedFragments(this, Fragment) && |
210 | "BOLT-ERROR: cannot have landing pads in different functions" ); |
211 | setHasIndirectTargetToSplitFragment(true); |
212 | BC.addFragmentsToSkip(Function: this); |
213 | return Error::success(); |
214 | } |
215 | |
216 | const uint64_t LPOffset = LandingPad - getAddress(); |
217 | if (!getInstructionAtOffset(Offset: LPOffset)) { |
218 | if (opts::Verbosity >= 1) |
219 | BC.errs() << "BOLT-WARNING: landing pad " |
220 | << Twine::utohexstr(Val: LPOffset) |
221 | << " not pointing to an instruction in function " << *this |
222 | << " - ignoring.\n" ; |
223 | } else { |
224 | auto Label = Labels.find(x: LPOffset); |
225 | if (Label != Labels.end()) { |
226 | LPSymbol = Label->second; |
227 | } else { |
228 | LPSymbol = BC.Ctx->createNamedTempSymbol(Name: "LP" ); |
229 | Labels[LPOffset] = LPSymbol; |
230 | } |
231 | } |
232 | } |
233 | |
234 | // Mark all call instructions in the range. |
235 | auto II = Instructions.find(x: Start); |
236 | auto IE = Instructions.end(); |
237 | assert(II != IE && "exception range not pointing to an instruction" ); |
238 | do { |
239 | MCInst &Instruction = II->second; |
240 | if (BC.MIB->isCall(Inst: Instruction) && |
241 | !BC.MIB->getConditionalTailCall(Inst: Instruction)) { |
242 | assert(!BC.MIB->isInvoke(Instruction) && |
243 | "overlapping exception ranges detected" ); |
244 | // Add extra operands to a call instruction making it an invoke from |
245 | // now on. |
246 | BC.MIB->addEHInfo(Inst&: Instruction, |
247 | LP: MCPlus::MCLandingPad(LPSymbol, ActionEntry)); |
248 | } |
249 | ++II; |
250 | } while (II != IE && II->first < Start + Length); |
251 | |
252 | if (ActionEntry != 0) { |
253 | auto printType = [&](int Index, raw_ostream &OS) { |
254 | assert(Index > 0 && "only positive indices are valid" ); |
255 | uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; |
256 | const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; |
257 | uint64_t TypeAddress = |
258 | *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, PCRelOffset: TTEntryAddress); |
259 | if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) |
260 | TypeAddress = 0; |
261 | if (TypeAddress == 0) { |
262 | OS << "<all>" ; |
263 | return; |
264 | } |
265 | if (TTypeEncoding & DW_EH_PE_indirect) { |
266 | ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress); |
267 | assert(PointerOrErr && "failed to decode indirect address" ); |
268 | TypeAddress = *PointerOrErr; |
269 | } |
270 | if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(Address: TypeAddress)) |
271 | OS << TypeSymBD->getName(); |
272 | else |
273 | OS << "0x" << Twine::utohexstr(Val: TypeAddress); |
274 | }; |
275 | if (opts::PrintExceptions) |
276 | BC.outs() << " actions: " ; |
277 | uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; |
278 | int64_t ActionType; |
279 | int64_t ActionNext; |
280 | const char *Sep = "" ; |
281 | do { |
282 | ActionType = Data.getSLEB128(OffsetPtr: &ActionPtr); |
283 | const uint32_t Self = ActionPtr; |
284 | ActionNext = Data.getSLEB128(OffsetPtr: &ActionPtr); |
285 | if (opts::PrintExceptions) |
286 | BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") " ; |
287 | if (ActionType == 0) { |
288 | if (opts::PrintExceptions) |
289 | BC.outs() << "cleanup" ; |
290 | } else if (ActionType > 0) { |
291 | // It's an index into a type table. |
292 | MaxTypeIndex = |
293 | std::max(a: MaxTypeIndex, b: static_cast<unsigned>(ActionType)); |
294 | if (opts::PrintExceptions) { |
295 | BC.outs() << "catch type " ; |
296 | printType(ActionType, BC.outs()); |
297 | } |
298 | } else { // ActionType < 0 |
299 | if (opts::PrintExceptions) |
300 | BC.outs() << "filter exception types " ; |
301 | const char *TSep = "" ; |
302 | // ActionType is a negative *byte* offset into *uleb128-encoded* table |
303 | // of indices with base 1. |
304 | // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are |
305 | // encoded using uleb128 thus we cannot directly dereference them. |
306 | uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; |
307 | while (uint64_t Index = Data.getULEB128(offset_ptr: &TypeIndexTablePtr)) { |
308 | MaxTypeIndex = std::max(a: MaxTypeIndex, b: static_cast<unsigned>(Index)); |
309 | if (opts::PrintExceptions) { |
310 | BC.outs() << TSep; |
311 | printType(Index, BC.outs()); |
312 | TSep = ", " ; |
313 | } |
314 | } |
315 | MaxTypeIndexTableOffset = std::max( |
316 | a: MaxTypeIndexTableOffset, b: TypeIndexTablePtr - TypeIndexTableStart); |
317 | } |
318 | |
319 | Sep = "; " ; |
320 | |
321 | ActionPtr = Self + ActionNext; |
322 | } while (ActionNext); |
323 | if (opts::PrintExceptions) |
324 | BC.outs() << '\n'; |
325 | } |
326 | } |
327 | if (opts::PrintExceptions) |
328 | BC.outs() << '\n'; |
329 | |
330 | assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= |
331 | Data.getData().size() && |
332 | "LSDA entry has crossed section boundary" ); |
333 | |
334 | if (TTypeEnd) { |
335 | LSDAActionTable = LSDASectionData.slice( |
336 | N: ActionTableStart, M: TypeIndexTableStart - |
337 | MaxTypeIndex * TTypeEncodingSize - |
338 | ActionTableStart); |
339 | for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { |
340 | uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; |
341 | const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; |
342 | uint64_t TypeAddress = |
343 | *Data.getEncodedPointer(Offset: &TTEntry, Encoding: TTypeEncoding, PCRelOffset: TTEntryAddress); |
344 | if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) |
345 | TypeAddress = 0; |
346 | if (TTypeEncoding & DW_EH_PE_indirect) { |
347 | LSDATypeAddressTable.emplace_back(Args&: TypeAddress); |
348 | if (TypeAddress) { |
349 | ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(Address: TypeAddress); |
350 | assert(PointerOrErr && "failed to decode indirect address" ); |
351 | TypeAddress = *PointerOrErr; |
352 | } |
353 | } |
354 | LSDATypeTable.emplace_back(Args&: TypeAddress); |
355 | } |
356 | LSDATypeIndexTable = |
357 | LSDASectionData.slice(N: TypeIndexTableStart, M: MaxTypeIndexTableOffset); |
358 | } |
359 | return Error::success(); |
360 | } |
361 | |
362 | void BinaryFunction::updateEHRanges() { |
363 | if (getSize() == 0) |
364 | return; |
365 | |
366 | assert(CurrentState == State::CFG_Finalized && "unexpected state" ); |
367 | |
368 | // Build call sites table. |
369 | struct EHInfo { |
370 | const MCSymbol *LP; // landing pad |
371 | uint64_t Action; |
372 | }; |
373 | |
374 | // Sites to update. |
375 | CallSitesList Sites; |
376 | |
377 | for (FunctionFragment &FF : getLayout().fragments()) { |
378 | // If previous call can throw, this is its exception handler. |
379 | EHInfo PreviousEH = {.LP: nullptr, .Action: 0}; |
380 | |
381 | // Marker for the beginning of exceptions range. |
382 | const MCSymbol *StartRange = nullptr; |
383 | |
384 | for (BinaryBasicBlock *const BB : FF) { |
385 | for (MCInst &Instr : *BB) { |
386 | if (!BC.MIB->isCall(Inst: Instr)) |
387 | continue; |
388 | |
389 | // Instruction can throw an exception that should be handled. |
390 | const bool Throws = BC.MIB->isInvoke(Inst: Instr); |
391 | |
392 | // Ignore the call if it's a continuation of a no-throw gap. |
393 | if (!Throws && !StartRange) |
394 | continue; |
395 | |
396 | // Extract exception handling information from the instruction. |
397 | const MCSymbol *LP = nullptr; |
398 | uint64_t Action = 0; |
399 | if (const std::optional<MCPlus::MCLandingPad> EHInfo = |
400 | BC.MIB->getEHInfo(Inst: Instr)) |
401 | std::tie(args&: LP, args&: Action) = *EHInfo; |
402 | |
403 | // No action if the exception handler has not changed. |
404 | if (Throws && StartRange && PreviousEH.LP == LP && |
405 | PreviousEH.Action == Action) |
406 | continue; |
407 | |
408 | // Same symbol is used for the beginning and the end of the range. |
409 | MCSymbol *EHSymbol; |
410 | if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Inst: Instr)) { |
411 | EHSymbol = InstrLabel; |
412 | } else { |
413 | std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex); |
414 | EHSymbol = BC.MIB->getOrCreateInstLabel(Inst&: Instr, Name: "EH" , Ctx: BC.Ctx.get()); |
415 | } |
416 | |
417 | // At this point we could be in one of the following states: |
418 | // |
419 | // I. Exception handler has changed and we need to close previous range |
420 | // and start a new one. |
421 | // |
422 | // II. Start a new exception range after the gap. |
423 | // |
424 | // III. Close current exception range and start a new gap. |
425 | const MCSymbol *EndRange; |
426 | if (StartRange) { |
427 | // I, III: |
428 | EndRange = EHSymbol; |
429 | } else { |
430 | // II: |
431 | StartRange = EHSymbol; |
432 | EndRange = nullptr; |
433 | } |
434 | |
435 | // Close the previous range. |
436 | if (EndRange) |
437 | Sites.emplace_back( |
438 | Args: FF.getFragmentNum(), |
439 | Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action}); |
440 | |
441 | if (Throws) { |
442 | // I, II: |
443 | StartRange = EHSymbol; |
444 | PreviousEH = EHInfo{.LP: LP, .Action: Action}; |
445 | } else { |
446 | StartRange = nullptr; |
447 | } |
448 | } |
449 | } |
450 | |
451 | // Check if we need to close the range. |
452 | if (StartRange) { |
453 | const MCSymbol *EndRange = getFunctionEndLabel(Fragment: FF.getFragmentNum()); |
454 | Sites.emplace_back( |
455 | Args: FF.getFragmentNum(), |
456 | Args: CallSite{.Start: StartRange, .End: EndRange, .LP: PreviousEH.LP, .Action: PreviousEH.Action}); |
457 | } |
458 | } |
459 | |
460 | addCallSites(NewCallSites: Sites); |
461 | } |
462 | |
463 | const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; |
464 | |
465 | CFIReaderWriter::CFIReaderWriter(BinaryContext &BC, |
466 | const DWARFDebugFrame &EHFrame) |
467 | : BC(BC) { |
468 | // Prepare FDEs for fast lookup |
469 | for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { |
470 | const auto *CurFDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
471 | // Skip CIEs. |
472 | if (!CurFDE) |
473 | continue; |
474 | // There could me multiple FDEs with the same initial address, and perhaps |
475 | // different sizes (address ranges). Use the first entry with non-zero size. |
476 | auto FDEI = FDEs.lower_bound(x: CurFDE->getInitialLocation()); |
477 | if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { |
478 | if (CurFDE->getAddressRange()) { |
479 | if (FDEI->second->getAddressRange() == 0) { |
480 | FDEI->second = CurFDE; |
481 | } else if (opts::Verbosity > 0) { |
482 | BC.errs() << "BOLT-WARNING: different FDEs for function at 0x" |
483 | << Twine::utohexstr(Val: FDEI->first) |
484 | << " detected; sizes: " << FDEI->second->getAddressRange() |
485 | << " and " << CurFDE->getAddressRange() << '\n'; |
486 | } |
487 | } |
488 | } else { |
489 | FDEs.emplace_hint(pos: FDEI, args: CurFDE->getInitialLocation(), args&: CurFDE); |
490 | } |
491 | } |
492 | } |
493 | |
494 | bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { |
495 | uint64_t Address = Function.getAddress(); |
496 | auto I = FDEs.find(x: Address); |
497 | // Ignore zero-length FDE ranges. |
498 | if (I == FDEs.end() || !I->second->getAddressRange()) |
499 | return true; |
500 | |
501 | const FDE &CurFDE = *I->second; |
502 | std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); |
503 | Function.setLSDAAddress(LSDA ? *LSDA : 0); |
504 | |
505 | uint64_t Offset = Function.getFirstInstructionOffset(); |
506 | uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); |
507 | uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); |
508 | if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { |
509 | Function.setPersonalityFunction( |
510 | *CurFDE.getLinkedCIE()->getPersonalityAddress()); |
511 | Function.setPersonalityEncoding( |
512 | *CurFDE.getLinkedCIE()->getPersonalityEncoding()); |
513 | } |
514 | |
515 | auto decodeFrameInstruction = [this, &Function, &Offset, Address, |
516 | CodeAlignment, DataAlignment]( |
517 | const CFIProgram::Instruction &Instr) { |
518 | uint8_t Opcode = Instr.Opcode; |
519 | if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) |
520 | Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; |
521 | switch (Instr.Opcode) { |
522 | case DW_CFA_nop: |
523 | break; |
524 | case DW_CFA_advance_loc4: |
525 | case DW_CFA_advance_loc2: |
526 | case DW_CFA_advance_loc1: |
527 | case DW_CFA_advance_loc: |
528 | // Advance our current address |
529 | Offset += CodeAlignment * int64_t(Instr.Ops[0]); |
530 | break; |
531 | case DW_CFA_offset_extended_sf: |
532 | Function.addCFIInstruction( |
533 | Offset, |
534 | Inst: MCCFIInstruction::createOffset( |
535 | L: nullptr, Register: Instr.Ops[0], Offset: DataAlignment * int64_t(Instr.Ops[1]))); |
536 | break; |
537 | case DW_CFA_offset_extended: |
538 | case DW_CFA_offset: |
539 | Function.addCFIInstruction( |
540 | Offset, Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Instr.Ops[0], |
541 | Offset: DataAlignment * Instr.Ops[1])); |
542 | break; |
543 | case DW_CFA_restore_extended: |
544 | case DW_CFA_restore: |
545 | Function.addCFIInstruction( |
546 | Offset, Inst: MCCFIInstruction::createRestore(L: nullptr, Register: Instr.Ops[0])); |
547 | break; |
548 | case DW_CFA_set_loc: |
549 | assert(Instr.Ops[0] >= Address && "set_loc out of function bounds" ); |
550 | assert(Instr.Ops[0] <= Address + Function.getSize() && |
551 | "set_loc out of function bounds" ); |
552 | Offset = Instr.Ops[0] - Address; |
553 | break; |
554 | |
555 | case DW_CFA_undefined: |
556 | Function.addCFIInstruction( |
557 | Offset, Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: Instr.Ops[0])); |
558 | break; |
559 | case DW_CFA_same_value: |
560 | Function.addCFIInstruction( |
561 | Offset, Inst: MCCFIInstruction::createSameValue(L: nullptr, Register: Instr.Ops[0])); |
562 | break; |
563 | case DW_CFA_register: |
564 | Function.addCFIInstruction( |
565 | Offset, Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: Instr.Ops[0], |
566 | Register2: Instr.Ops[1])); |
567 | break; |
568 | case DW_CFA_remember_state: |
569 | Function.addCFIInstruction( |
570 | Offset, Inst: MCCFIInstruction::createRememberState(L: nullptr)); |
571 | break; |
572 | case DW_CFA_restore_state: |
573 | Function.addCFIInstruction(Offset, |
574 | Inst: MCCFIInstruction::createRestoreState(L: nullptr)); |
575 | break; |
576 | case DW_CFA_def_cfa: |
577 | Function.addCFIInstruction( |
578 | Offset, |
579 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], Offset: Instr.Ops[1])); |
580 | break; |
581 | case DW_CFA_def_cfa_sf: |
582 | Function.addCFIInstruction( |
583 | Offset, |
584 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: Instr.Ops[0], |
585 | Offset: DataAlignment * int64_t(Instr.Ops[1]))); |
586 | break; |
587 | case DW_CFA_def_cfa_register: |
588 | Function.addCFIInstruction(Offset, Inst: MCCFIInstruction::createDefCfaRegister( |
589 | L: nullptr, Register: Instr.Ops[0])); |
590 | break; |
591 | case DW_CFA_def_cfa_offset: |
592 | Function.addCFIInstruction( |
593 | Offset, Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Instr.Ops[0])); |
594 | break; |
595 | case DW_CFA_def_cfa_offset_sf: |
596 | Function.addCFIInstruction( |
597 | Offset, Inst: MCCFIInstruction::cfiDefCfaOffset( |
598 | L: nullptr, Offset: DataAlignment * int64_t(Instr.Ops[0]))); |
599 | break; |
600 | case DW_CFA_GNU_args_size: |
601 | Function.addCFIInstruction( |
602 | Offset, Inst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Instr.Ops[0])); |
603 | Function.setUsesGnuArgsSize(); |
604 | break; |
605 | case DW_CFA_val_offset_sf: |
606 | case DW_CFA_val_offset: |
607 | if (opts::Verbosity >= 1) { |
608 | BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n" ; |
609 | } |
610 | return false; |
611 | case DW_CFA_def_cfa_expression: |
612 | case DW_CFA_val_expression: |
613 | case DW_CFA_expression: { |
614 | StringRef ExprBytes = Instr.Expression->getData(); |
615 | std::string Str; |
616 | raw_string_ostream OS(Str); |
617 | // Manually encode this instruction using CFI escape |
618 | OS << Opcode; |
619 | if (Opcode != DW_CFA_def_cfa_expression) |
620 | encodeULEB128(Value: Instr.Ops[0], OS); |
621 | encodeULEB128(Value: ExprBytes.size(), OS); |
622 | OS << ExprBytes; |
623 | Function.addCFIInstruction( |
624 | Offset, Inst: MCCFIInstruction::createEscape(L: nullptr, Vals: OS.str())); |
625 | break; |
626 | } |
627 | case DW_CFA_MIPS_advance_loc8: |
628 | if (opts::Verbosity >= 1) |
629 | BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n" ; |
630 | return false; |
631 | case DW_CFA_GNU_window_save: |
632 | // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same |
633 | // id but mean different things. The latter is used in AArch64. |
634 | if (Function.getBinaryContext().isAArch64()) { |
635 | Function.addCFIInstruction( |
636 | Offset, Inst: MCCFIInstruction::createNegateRAState(L: nullptr)); |
637 | break; |
638 | } |
639 | if (opts::Verbosity >= 1) |
640 | BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n" ; |
641 | return false; |
642 | case DW_CFA_lo_user: |
643 | case DW_CFA_hi_user: |
644 | if (opts::Verbosity >= 1) |
645 | BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n" ; |
646 | return false; |
647 | default: |
648 | if (opts::Verbosity >= 1) |
649 | BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: " |
650 | << Instr.Opcode << '\n'; |
651 | return false; |
652 | } |
653 | |
654 | return true; |
655 | }; |
656 | |
657 | for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) |
658 | if (!decodeFrameInstruction(Instr)) |
659 | return false; |
660 | |
661 | for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) |
662 | if (!decodeFrameInstruction(Instr)) |
663 | return false; |
664 | |
665 | return true; |
666 | } |
667 | |
668 | std::vector<char> |
669 | CFIReaderWriter::(const DWARFDebugFrame &OldEHFrame, |
670 | const DWARFDebugFrame &NewEHFrame, |
671 | uint64_t ) const { |
672 | // Common PC -> FDE map to be written into .eh_frame_hdr. |
673 | std::map<uint64_t, uint64_t> PCToFDE; |
674 | |
675 | // Initialize PCToFDE using NewEHFrame. |
676 | for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { |
677 | const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
678 | if (FDE == nullptr) |
679 | continue; |
680 | const uint64_t FuncAddress = FDE->getInitialLocation(); |
681 | const uint64_t FDEAddress = |
682 | NewEHFrame.getEHFrameAddress() + FDE->getOffset(); |
683 | |
684 | // Ignore unused FDEs. |
685 | if (FuncAddress == 0) |
686 | continue; |
687 | |
688 | // Add the address to the map unless we failed to write it. |
689 | PCToFDE[FuncAddress] = FDEAddress; |
690 | }; |
691 | |
692 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " |
693 | << llvm::size(NewEHFrame.entries()) << " entries\n" ); |
694 | |
695 | // Add entries from the original .eh_frame corresponding to the functions |
696 | // that we did not update. |
697 | for (const dwarf::FrameEntry &Entry : OldEHFrame) { |
698 | const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(Val: &Entry); |
699 | if (FDE == nullptr) |
700 | continue; |
701 | const uint64_t FuncAddress = FDE->getInitialLocation(); |
702 | const uint64_t FDEAddress = |
703 | OldEHFrame.getEHFrameAddress() + FDE->getOffset(); |
704 | |
705 | // Add the address if we failed to write it. |
706 | if (PCToFDE.count(x: FuncAddress) == 0) { |
707 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" |
708 | << Twine::utohexstr(FuncAddress) << " is at 0x" |
709 | << Twine::utohexstr(FDEAddress) << '\n'); |
710 | PCToFDE[FuncAddress] = FDEAddress; |
711 | } |
712 | }; |
713 | |
714 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " |
715 | << llvm::size(OldEHFrame.entries()) << " entries\n" ); |
716 | |
717 | // Generate a new .eh_frame_hdr based on the new map. |
718 | |
719 | // Header plus table of entries of size 8 bytes. |
720 | std::vector<char> (12 + PCToFDE.size() * 8); |
721 | |
722 | // Version is 1. |
723 | EHFrameHeader[0] = 1; |
724 | // Encoding of the eh_frame pointer. |
725 | EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; |
726 | // Encoding of the count field to follow. |
727 | EHFrameHeader[2] = DW_EH_PE_udata4; |
728 | // Encoding of the table entries - 4-byte offset from the start of the header. |
729 | EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; |
730 | |
731 | // Address of eh_frame. Use the new one. |
732 | support::ulittle32_t::ref(EHFrameHeader.data() + 4) = |
733 | NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); |
734 | |
735 | // Number of entries in the table (FDE count). |
736 | support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); |
737 | |
738 | // Write the table at offset 12. |
739 | char *Ptr = EHFrameHeader.data(); |
740 | uint32_t Offset = 12; |
741 | for (const auto &PCI : PCToFDE) { |
742 | int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; |
743 | assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds" ); |
744 | support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; |
745 | Offset += 4; |
746 | int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; |
747 | assert(isInt<32>(FDEOffset) && "FDE offset out of bounds" ); |
748 | support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; |
749 | Offset += 4; |
750 | } |
751 | |
752 | return EHFrameHeader; |
753 | } |
754 | |
755 | Error EHFrameParser::parseCIE(uint64_t StartOffset) { |
756 | uint8_t Version = Data.getU8(offset_ptr: &Offset); |
757 | const char *Augmentation = Data.getCStr(OffsetPtr: &Offset); |
758 | StringRef AugmentationString(Augmentation ? Augmentation : "" ); |
759 | uint8_t AddressSize = |
760 | Version < 4 ? Data.getAddressSize() : Data.getU8(offset_ptr: &Offset); |
761 | Data.setAddressSize(AddressSize); |
762 | // Skip segment descriptor size |
763 | if (Version >= 4) |
764 | Offset += 1; |
765 | // Skip code alignment factor |
766 | Data.getULEB128(offset_ptr: &Offset); |
767 | // Skip data alignment |
768 | Data.getSLEB128(OffsetPtr: &Offset); |
769 | // Skip return address register |
770 | if (Version == 1) |
771 | Offset += 1; |
772 | else |
773 | Data.getULEB128(offset_ptr: &Offset); |
774 | |
775 | uint32_t FDEPointerEncoding = DW_EH_PE_absptr; |
776 | uint32_t LSDAPointerEncoding = DW_EH_PE_omit; |
777 | // Walk the augmentation string to get all the augmentation data. |
778 | for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { |
779 | switch (AugmentationString[i]) { |
780 | default: |
781 | return createStringError( |
782 | EC: errc::invalid_argument, |
783 | Fmt: "unknown augmentation character in entry at 0x%" PRIx64, Vals: StartOffset); |
784 | case 'L': |
785 | LSDAPointerEncoding = Data.getU8(offset_ptr: &Offset); |
786 | break; |
787 | case 'P': { |
788 | uint32_t PersonalityEncoding = Data.getU8(offset_ptr: &Offset); |
789 | std::optional<uint64_t> Personality = |
790 | Data.getEncodedPointer(Offset: &Offset, Encoding: PersonalityEncoding, |
791 | PCRelOffset: EHFrameAddress ? EHFrameAddress + Offset : 0); |
792 | // Patch personality address |
793 | if (Personality) |
794 | PatcherCallback(*Personality, Offset, PersonalityEncoding); |
795 | break; |
796 | } |
797 | case 'R': |
798 | FDEPointerEncoding = Data.getU8(offset_ptr: &Offset); |
799 | break; |
800 | case 'z': |
801 | if (i) |
802 | return createStringError( |
803 | EC: errc::invalid_argument, |
804 | Fmt: "'z' must be the first character at 0x%" PRIx64, Vals: StartOffset); |
805 | // Skip augmentation length |
806 | Data.getULEB128(offset_ptr: &Offset); |
807 | break; |
808 | case 'S': |
809 | case 'B': |
810 | break; |
811 | } |
812 | } |
813 | Entries.emplace_back(args: std::make_unique<CIEInfo>( |
814 | args&: FDEPointerEncoding, args&: LSDAPointerEncoding, args&: AugmentationString)); |
815 | CIEs[StartOffset] = &*Entries.back(); |
816 | return Error::success(); |
817 | } |
818 | |
819 | Error EHFrameParser::parseFDE(uint64_t CIEPointer, |
820 | uint64_t StartStructureOffset) { |
821 | std::optional<uint64_t> LSDAAddress; |
822 | CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; |
823 | |
824 | // The address size is encoded in the CIE we reference. |
825 | if (!Cie) |
826 | return createStringError(EC: errc::invalid_argument, |
827 | Fmt: "parsing FDE data at 0x%" PRIx64 |
828 | " failed due to missing CIE" , |
829 | Vals: StartStructureOffset); |
830 | // Patch initial location |
831 | if (auto Val = Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, |
832 | PCRelOffset: EHFrameAddress + Offset)) { |
833 | PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); |
834 | } |
835 | // Skip address range |
836 | Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->FDEPtrEncoding, PCRelOffset: 0); |
837 | |
838 | // Process augmentation data for this FDE. |
839 | StringRef AugmentationString = Cie->AugmentationString; |
840 | if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { |
841 | // Skip augmentation length |
842 | Data.getULEB128(offset_ptr: &Offset); |
843 | LSDAAddress = |
844 | Data.getEncodedPointer(Offset: &Offset, Encoding: Cie->LSDAPtrEncoding, |
845 | PCRelOffset: EHFrameAddress ? Offset + EHFrameAddress : 0); |
846 | // Patch LSDA address |
847 | PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); |
848 | } |
849 | return Error::success(); |
850 | } |
851 | |
852 | Error EHFrameParser::parse() { |
853 | while (Data.isValidOffset(offset: Offset)) { |
854 | const uint64_t StartOffset = Offset; |
855 | |
856 | uint64_t Length; |
857 | DwarfFormat Format; |
858 | std::tie(args&: Length, args&: Format) = Data.getInitialLength(Off: &Offset); |
859 | |
860 | // If the Length is 0, then this CIE is a terminator |
861 | if (Length == 0) |
862 | break; |
863 | |
864 | const uint64_t StartStructureOffset = Offset; |
865 | const uint64_t EndStructureOffset = Offset + Length; |
866 | |
867 | Error Err = Error::success(); |
868 | const uint64_t Id = Data.getRelocatedValue(Size: 4, Off: &Offset, |
869 | /*SectionIndex=*/nullptr, Err: &Err); |
870 | if (Err) |
871 | return Err; |
872 | |
873 | if (!Id) { |
874 | if (Error Err = parseCIE(StartOffset)) |
875 | return Err; |
876 | } else { |
877 | if (Error Err = parseFDE(CIEPointer: Id, StartStructureOffset)) |
878 | return Err; |
879 | } |
880 | Offset = EndStructureOffset; |
881 | } |
882 | |
883 | return Error::success(); |
884 | } |
885 | |
886 | Error EHFrameParser::(DWARFDataExtractor Data, uint64_t EHFrameAddress, |
887 | PatcherCallbackTy PatcherCallback) { |
888 | EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); |
889 | return Parser.parse(); |
890 | } |
891 | |
892 | } // namespace bolt |
893 | } // namespace llvm |
894 | |