1//===-- ObjectFileMachO.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/ScopeExit.h"
10#include "llvm/ADT/StringRef.h"
11
12#include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
13#include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
14#include "Plugins/Process/Utility/RegisterContextDarwin_riscv32.h"
15#include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h"
16#include "lldb/Core/Debugger.h"
17#include "lldb/Core/Module.h"
18#include "lldb/Core/ModuleSpec.h"
19#include "lldb/Core/PluginManager.h"
20#include "lldb/Core/Progress.h"
21#include "lldb/Core/Section.h"
22#include "lldb/Host/Host.h"
23#include "lldb/Symbol/DWARFCallFrameInfo.h"
24#include "lldb/Symbol/ObjectFile.h"
25#include "lldb/Target/DynamicLoader.h"
26#include "lldb/Target/MemoryRegionInfo.h"
27#include "lldb/Target/Platform.h"
28#include "lldb/Target/Process.h"
29#include "lldb/Target/SectionLoadList.h"
30#include "lldb/Target/Target.h"
31#include "lldb/Target/Thread.h"
32#include "lldb/Target/ThreadList.h"
33#include "lldb/Utility/ArchSpec.h"
34#include "lldb/Utility/DataBuffer.h"
35#include "lldb/Utility/FileSpec.h"
36#include "lldb/Utility/FileSpecList.h"
37#include "lldb/Utility/LLDBLog.h"
38#include "lldb/Utility/Log.h"
39#include "lldb/Utility/RangeMap.h"
40#include "lldb/Utility/RegisterValue.h"
41#include "lldb/Utility/Status.h"
42#include "lldb/Utility/StreamString.h"
43#include "lldb/Utility/Timer.h"
44#include "lldb/Utility/UUID.h"
45
46#include "lldb/Host/SafeMachO.h"
47
48#include "llvm/ADT/DenseSet.h"
49#include "llvm/Support/FormatVariadic.h"
50#include "llvm/Support/MemoryBuffer.h"
51
52#include "ObjectFileMachO.h"
53
54#if defined(__APPLE__)
55#include <TargetConditionals.h>
56// GetLLDBSharedCacheUUID() needs to call dlsym()
57#include <dlfcn.h>
58#include <mach/mach_init.h>
59#include <mach/vm_map.h>
60#include <lldb/Host/SafeMachO.h>
61#endif
62
63#ifndef __APPLE__
64#include "lldb/Utility/AppleUuidCompatibility.h"
65#else
66#include <uuid/uuid.h>
67#endif
68
69#include <bitset>
70#include <memory>
71#include <optional>
72
73// Unfortunately the signpost header pulls in the system MachO header, too.
74#ifdef CPU_TYPE_ARM
75#undef CPU_TYPE_ARM
76#endif
77#ifdef CPU_TYPE_ARM64
78#undef CPU_TYPE_ARM64
79#endif
80#ifdef CPU_TYPE_ARM64_32
81#undef CPU_TYPE_ARM64_32
82#endif
83#ifdef CPU_TYPE_X86_64
84#undef CPU_TYPE_X86_64
85#endif
86#ifdef MH_DYLINKER
87#undef MH_DYLINKER
88#endif
89#ifdef MH_OBJECT
90#undef MH_OBJECT
91#endif
92#ifdef LC_VERSION_MIN_MACOSX
93#undef LC_VERSION_MIN_MACOSX
94#endif
95#ifdef LC_VERSION_MIN_IPHONEOS
96#undef LC_VERSION_MIN_IPHONEOS
97#endif
98#ifdef LC_VERSION_MIN_TVOS
99#undef LC_VERSION_MIN_TVOS
100#endif
101#ifdef LC_VERSION_MIN_WATCHOS
102#undef LC_VERSION_MIN_WATCHOS
103#endif
104#ifdef LC_BUILD_VERSION
105#undef LC_BUILD_VERSION
106#endif
107#ifdef PLATFORM_MACOS
108#undef PLATFORM_MACOS
109#endif
110#ifdef PLATFORM_MACCATALYST
111#undef PLATFORM_MACCATALYST
112#endif
113#ifdef PLATFORM_IOS
114#undef PLATFORM_IOS
115#endif
116#ifdef PLATFORM_IOSSIMULATOR
117#undef PLATFORM_IOSSIMULATOR
118#endif
119#ifdef PLATFORM_TVOS
120#undef PLATFORM_TVOS
121#endif
122#ifdef PLATFORM_TVOSSIMULATOR
123#undef PLATFORM_TVOSSIMULATOR
124#endif
125#ifdef PLATFORM_WATCHOS
126#undef PLATFORM_WATCHOS
127#endif
128#ifdef PLATFORM_WATCHOSSIMULATOR
129#undef PLATFORM_WATCHOSSIMULATOR
130#endif
131
132#define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
133using namespace lldb;
134using namespace lldb_private;
135using namespace llvm::MachO;
136
137static constexpr llvm::StringLiteral g_loader_path = "@loader_path";
138static constexpr llvm::StringLiteral g_executable_path = "@executable_path";
139
140LLDB_PLUGIN_DEFINE(ObjectFileMachO)
141
142static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
143 const char *alt_name, size_t reg_byte_size,
144 Stream &data) {
145 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name: name);
146 if (reg_info == nullptr)
147 reg_info = reg_ctx->GetRegisterInfoByName(reg_name: alt_name);
148 if (reg_info) {
149 lldb_private::RegisterValue reg_value;
150 if (reg_ctx->ReadRegister(reg_info, reg_value)) {
151 if (reg_info->byte_size >= reg_byte_size)
152 data.Write(src: reg_value.GetBytes(), src_len: reg_byte_size);
153 else {
154 data.Write(src: reg_value.GetBytes(), src_len: reg_info->byte_size);
155 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
156 data.PutChar(ch: 0);
157 }
158 return;
159 }
160 }
161 // Just write zeros if all else fails
162 for (size_t i = 0; i < reg_byte_size; ++i)
163 data.PutChar(ch: 0);
164}
165
166class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
167public:
168 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread,
169 const DataExtractor &data)
170 : RegisterContextDarwin_x86_64(thread, 0) {
171 SetRegisterDataFrom_LC_THREAD(data);
172 }
173
174 void InvalidateAllRegisters() override {
175 // Do nothing... registers are always valid...
176 }
177
178 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
179 lldb::offset_t offset = 0;
180 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
181 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
182 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
183
184 while (offset < data.GetByteSize()) {
185 int flavor = data.GetU32(offset_ptr: &offset);
186 if (flavor == 0)
187 break;
188 uint32_t count = data.GetU32(offset_ptr: &offset);
189 switch (flavor) {
190 case GPRRegSet: {
191 uint32_t *gpr_data = reinterpret_cast<uint32_t *>(&gpr.rax);
192 for (uint32_t i = 0; i < count && offset < data.GetByteSize(); ++i)
193 gpr_data[i] = data.GetU32(offset_ptr: &offset);
194 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
195 } break;
196 case FPURegSet:
197 // TODO: fill in FPU regs....
198 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
199 break;
200 case EXCRegSet:
201 exc.trapno = data.GetU32(offset_ptr: &offset);
202 exc.err = data.GetU32(offset_ptr: &offset);
203 exc.faultvaddr = data.GetU64(offset_ptr: &offset);
204 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
205 break;
206 default:
207 offset += count * 4;
208 break;
209 }
210 }
211 }
212
213 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
214 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
215 if (reg_ctx_sp) {
216 RegisterContext *reg_ctx = reg_ctx_sp.get();
217
218 data.PutHex32(uvalue: GPRRegSet); // Flavor
219 data.PutHex32(uvalue: GPRWordCount);
220 PrintRegisterValue(reg_ctx, name: "rax", alt_name: nullptr, reg_byte_size: 8, data);
221 PrintRegisterValue(reg_ctx, name: "rbx", alt_name: nullptr, reg_byte_size: 8, data);
222 PrintRegisterValue(reg_ctx, name: "rcx", alt_name: nullptr, reg_byte_size: 8, data);
223 PrintRegisterValue(reg_ctx, name: "rdx", alt_name: nullptr, reg_byte_size: 8, data);
224 PrintRegisterValue(reg_ctx, name: "rdi", alt_name: nullptr, reg_byte_size: 8, data);
225 PrintRegisterValue(reg_ctx, name: "rsi", alt_name: nullptr, reg_byte_size: 8, data);
226 PrintRegisterValue(reg_ctx, name: "rbp", alt_name: nullptr, reg_byte_size: 8, data);
227 PrintRegisterValue(reg_ctx, name: "rsp", alt_name: nullptr, reg_byte_size: 8, data);
228 PrintRegisterValue(reg_ctx, name: "r8", alt_name: nullptr, reg_byte_size: 8, data);
229 PrintRegisterValue(reg_ctx, name: "r9", alt_name: nullptr, reg_byte_size: 8, data);
230 PrintRegisterValue(reg_ctx, name: "r10", alt_name: nullptr, reg_byte_size: 8, data);
231 PrintRegisterValue(reg_ctx, name: "r11", alt_name: nullptr, reg_byte_size: 8, data);
232 PrintRegisterValue(reg_ctx, name: "r12", alt_name: nullptr, reg_byte_size: 8, data);
233 PrintRegisterValue(reg_ctx, name: "r13", alt_name: nullptr, reg_byte_size: 8, data);
234 PrintRegisterValue(reg_ctx, name: "r14", alt_name: nullptr, reg_byte_size: 8, data);
235 PrintRegisterValue(reg_ctx, name: "r15", alt_name: nullptr, reg_byte_size: 8, data);
236 PrintRegisterValue(reg_ctx, name: "rip", alt_name: nullptr, reg_byte_size: 8, data);
237 PrintRegisterValue(reg_ctx, name: "rflags", alt_name: nullptr, reg_byte_size: 8, data);
238 PrintRegisterValue(reg_ctx, name: "cs", alt_name: nullptr, reg_byte_size: 8, data);
239 PrintRegisterValue(reg_ctx, name: "fs", alt_name: nullptr, reg_byte_size: 8, data);
240 PrintRegisterValue(reg_ctx, name: "gs", alt_name: nullptr, reg_byte_size: 8, data);
241
242 // // Write out the FPU registers
243 // const size_t fpu_byte_size = sizeof(FPU);
244 // size_t bytes_written = 0;
245 // data.PutHex32 (FPURegSet);
246 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
247 // bytes_written += data.PutHex32(0); // uint32_t pad[0]
248 // bytes_written += data.PutHex32(0); // uint32_t pad[1]
249 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
250 // data); // uint16_t fcw; // "fctrl"
251 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
252 // data); // uint16_t fsw; // "fstat"
253 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
254 // data); // uint8_t ftw; // "ftag"
255 // bytes_written += data.PutHex8 (0); // uint8_t pad1;
256 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
257 // data); // uint16_t fop; // "fop"
258 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
259 // data); // uint32_t ip; // "fioff"
260 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
261 // data); // uint16_t cs; // "fiseg"
262 // bytes_written += data.PutHex16 (0); // uint16_t pad2;
263 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
264 // data); // uint32_t dp; // "fooff"
265 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
266 // data); // uint16_t ds; // "foseg"
267 // bytes_written += data.PutHex16 (0); // uint16_t pad3;
268 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
269 // data); // uint32_t mxcsr;
270 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
271 // 4, data);// uint32_t mxcsrmask;
272 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
273 // sizeof(MMSReg), data);
274 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
275 // sizeof(MMSReg), data);
276 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
277 // sizeof(MMSReg), data);
278 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
279 // sizeof(MMSReg), data);
280 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
281 // sizeof(MMSReg), data);
282 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
283 // sizeof(MMSReg), data);
284 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
285 // sizeof(MMSReg), data);
286 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
287 // sizeof(MMSReg), data);
288 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
289 // sizeof(XMMReg), data);
290 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
291 // sizeof(XMMReg), data);
292 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
293 // sizeof(XMMReg), data);
294 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
295 // sizeof(XMMReg), data);
296 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
297 // sizeof(XMMReg), data);
298 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
299 // sizeof(XMMReg), data);
300 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
301 // sizeof(XMMReg), data);
302 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
303 // sizeof(XMMReg), data);
304 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
305 // sizeof(XMMReg), data);
306 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
307 // sizeof(XMMReg), data);
308 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
309 // sizeof(XMMReg), data);
310 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
311 // sizeof(XMMReg), data);
312 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
313 // sizeof(XMMReg), data);
314 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
315 // sizeof(XMMReg), data);
316 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
317 // sizeof(XMMReg), data);
318 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
319 // sizeof(XMMReg), data);
320 //
321 // // Fill rest with zeros
322 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
323 // i)
324 // data.PutChar(0);
325
326 // Write out the EXC registers
327 data.PutHex32(uvalue: EXCRegSet);
328 data.PutHex32(uvalue: EXCWordCount);
329 PrintRegisterValue(reg_ctx, name: "trapno", alt_name: nullptr, reg_byte_size: 4, data);
330 PrintRegisterValue(reg_ctx, name: "err", alt_name: nullptr, reg_byte_size: 4, data);
331 PrintRegisterValue(reg_ctx, name: "faultvaddr", alt_name: nullptr, reg_byte_size: 8, data);
332 return true;
333 }
334 return false;
335 }
336
337protected:
338 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
339
340 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
341
342 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
343
344 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
345 return 0;
346 }
347
348 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
349 return 0;
350 }
351
352 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
353 return 0;
354 }
355};
356
357class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm {
358public:
359 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread,
360 const DataExtractor &data)
361 : RegisterContextDarwin_arm(thread, 0) {
362 SetRegisterDataFrom_LC_THREAD(data);
363 }
364
365 void InvalidateAllRegisters() override {
366 // Do nothing... registers are always valid...
367 }
368
369 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
370 lldb::offset_t offset = 0;
371 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
372 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
373 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
374
375 while (offset < data.GetByteSize()) {
376 int flavor = data.GetU32(offset_ptr: &offset);
377 uint32_t count = data.GetU32(offset_ptr: &offset);
378 offset_t next_thread_state = offset + (count * 4);
379 switch (flavor) {
380 case GPRAltRegSet:
381 case GPRRegSet: {
382 // r0-r15, plus CPSR
383 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1;
384 if (count == gpr_buf_count) {
385 for (uint32_t i = 0; i < (count - 1); ++i) {
386 gpr.r[i] = data.GetU32(offset_ptr: &offset);
387 }
388 gpr.cpsr = data.GetU32(offset_ptr: &offset);
389
390 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
391 }
392 } break;
393
394 case FPURegSet: {
395 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
396 const int fpu_reg_buf_size = sizeof(fpu.floats);
397 if (data.ExtractBytes(offset, length: fpu_reg_buf_size, dst_byte_order: eByteOrderLittle,
398 dst: fpu_reg_buf) == fpu_reg_buf_size) {
399 offset += fpu_reg_buf_size;
400 fpu.fpscr = data.GetU32(offset_ptr: &offset);
401 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
402 }
403 } break;
404
405 case EXCRegSet:
406 if (count == 3) {
407 exc.exception = data.GetU32(offset_ptr: &offset);
408 exc.fsr = data.GetU32(offset_ptr: &offset);
409 exc.far = data.GetU32(offset_ptr: &offset);
410 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
411 }
412 break;
413 }
414 offset = next_thread_state;
415 }
416 }
417
418 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
419 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
420 if (reg_ctx_sp) {
421 RegisterContext *reg_ctx = reg_ctx_sp.get();
422
423 data.PutHex32(uvalue: GPRRegSet); // Flavor
424 data.PutHex32(uvalue: GPRWordCount);
425 PrintRegisterValue(reg_ctx, name: "r0", alt_name: nullptr, reg_byte_size: 4, data);
426 PrintRegisterValue(reg_ctx, name: "r1", alt_name: nullptr, reg_byte_size: 4, data);
427 PrintRegisterValue(reg_ctx, name: "r2", alt_name: nullptr, reg_byte_size: 4, data);
428 PrintRegisterValue(reg_ctx, name: "r3", alt_name: nullptr, reg_byte_size: 4, data);
429 PrintRegisterValue(reg_ctx, name: "r4", alt_name: nullptr, reg_byte_size: 4, data);
430 PrintRegisterValue(reg_ctx, name: "r5", alt_name: nullptr, reg_byte_size: 4, data);
431 PrintRegisterValue(reg_ctx, name: "r6", alt_name: nullptr, reg_byte_size: 4, data);
432 PrintRegisterValue(reg_ctx, name: "r7", alt_name: nullptr, reg_byte_size: 4, data);
433 PrintRegisterValue(reg_ctx, name: "r8", alt_name: nullptr, reg_byte_size: 4, data);
434 PrintRegisterValue(reg_ctx, name: "r9", alt_name: nullptr, reg_byte_size: 4, data);
435 PrintRegisterValue(reg_ctx, name: "r10", alt_name: nullptr, reg_byte_size: 4, data);
436 PrintRegisterValue(reg_ctx, name: "r11", alt_name: nullptr, reg_byte_size: 4, data);
437 PrintRegisterValue(reg_ctx, name: "r12", alt_name: nullptr, reg_byte_size: 4, data);
438 PrintRegisterValue(reg_ctx, name: "sp", alt_name: nullptr, reg_byte_size: 4, data);
439 PrintRegisterValue(reg_ctx, name: "lr", alt_name: nullptr, reg_byte_size: 4, data);
440 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 4, data);
441 PrintRegisterValue(reg_ctx, name: "cpsr", alt_name: nullptr, reg_byte_size: 4, data);
442
443 // Write out the EXC registers
444 // data.PutHex32 (EXCRegSet);
445 // data.PutHex32 (EXCWordCount);
446 // WriteRegister (reg_ctx, "exception", NULL, 4, data);
447 // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
448 // WriteRegister (reg_ctx, "far", NULL, 4, data);
449 return true;
450 }
451 return false;
452 }
453
454protected:
455 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
456
457 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
458
459 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
460
461 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
462
463 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
464 return 0;
465 }
466
467 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
468 return 0;
469 }
470
471 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
472 return 0;
473 }
474
475 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
476 return -1;
477 }
478};
479
480class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 {
481public:
482 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread,
483 const DataExtractor &data)
484 : RegisterContextDarwin_arm64(thread, 0) {
485 SetRegisterDataFrom_LC_THREAD(data);
486 }
487
488 void InvalidateAllRegisters() override {
489 // Do nothing... registers are always valid...
490 }
491
492 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
493 lldb::offset_t offset = 0;
494 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
495 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
496 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
497 while (offset < data.GetByteSize()) {
498 int flavor = data.GetU32(offset_ptr: &offset);
499 uint32_t count = data.GetU32(offset_ptr: &offset);
500 offset_t next_thread_state = offset + (count * 4);
501 switch (flavor) {
502 case GPRRegSet:
503 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
504 // 32-bit register)
505 if (count >= (33 * 2) + 1) {
506 for (uint32_t i = 0; i < 29; ++i)
507 gpr.x[i] = data.GetU64(offset_ptr: &offset);
508 gpr.fp = data.GetU64(offset_ptr: &offset);
509 gpr.lr = data.GetU64(offset_ptr: &offset);
510 gpr.sp = data.GetU64(offset_ptr: &offset);
511 gpr.pc = data.GetU64(offset_ptr: &offset);
512 gpr.cpsr = data.GetU32(offset_ptr: &offset);
513 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
514 }
515 break;
516 case FPURegSet: {
517 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
518 const int fpu_reg_buf_size = sizeof(fpu);
519 if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
520 data.ExtractBytes(offset, length: fpu_reg_buf_size, dst_byte_order: eByteOrderLittle,
521 dst: fpu_reg_buf) == fpu_reg_buf_size) {
522 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
523 }
524 } break;
525 case EXCRegSet:
526 if (count == 4) {
527 exc.far = data.GetU64(offset_ptr: &offset);
528 exc.esr = data.GetU32(offset_ptr: &offset);
529 exc.exception = data.GetU32(offset_ptr: &offset);
530 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
531 }
532 break;
533 }
534 offset = next_thread_state;
535 }
536 }
537
538 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
539 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
540 if (reg_ctx_sp) {
541 RegisterContext *reg_ctx = reg_ctx_sp.get();
542
543 data.PutHex32(uvalue: GPRRegSet); // Flavor
544 data.PutHex32(uvalue: GPRWordCount);
545 PrintRegisterValue(reg_ctx, name: "x0", alt_name: nullptr, reg_byte_size: 8, data);
546 PrintRegisterValue(reg_ctx, name: "x1", alt_name: nullptr, reg_byte_size: 8, data);
547 PrintRegisterValue(reg_ctx, name: "x2", alt_name: nullptr, reg_byte_size: 8, data);
548 PrintRegisterValue(reg_ctx, name: "x3", alt_name: nullptr, reg_byte_size: 8, data);
549 PrintRegisterValue(reg_ctx, name: "x4", alt_name: nullptr, reg_byte_size: 8, data);
550 PrintRegisterValue(reg_ctx, name: "x5", alt_name: nullptr, reg_byte_size: 8, data);
551 PrintRegisterValue(reg_ctx, name: "x6", alt_name: nullptr, reg_byte_size: 8, data);
552 PrintRegisterValue(reg_ctx, name: "x7", alt_name: nullptr, reg_byte_size: 8, data);
553 PrintRegisterValue(reg_ctx, name: "x8", alt_name: nullptr, reg_byte_size: 8, data);
554 PrintRegisterValue(reg_ctx, name: "x9", alt_name: nullptr, reg_byte_size: 8, data);
555 PrintRegisterValue(reg_ctx, name: "x10", alt_name: nullptr, reg_byte_size: 8, data);
556 PrintRegisterValue(reg_ctx, name: "x11", alt_name: nullptr, reg_byte_size: 8, data);
557 PrintRegisterValue(reg_ctx, name: "x12", alt_name: nullptr, reg_byte_size: 8, data);
558 PrintRegisterValue(reg_ctx, name: "x13", alt_name: nullptr, reg_byte_size: 8, data);
559 PrintRegisterValue(reg_ctx, name: "x14", alt_name: nullptr, reg_byte_size: 8, data);
560 PrintRegisterValue(reg_ctx, name: "x15", alt_name: nullptr, reg_byte_size: 8, data);
561 PrintRegisterValue(reg_ctx, name: "x16", alt_name: nullptr, reg_byte_size: 8, data);
562 PrintRegisterValue(reg_ctx, name: "x17", alt_name: nullptr, reg_byte_size: 8, data);
563 PrintRegisterValue(reg_ctx, name: "x18", alt_name: nullptr, reg_byte_size: 8, data);
564 PrintRegisterValue(reg_ctx, name: "x19", alt_name: nullptr, reg_byte_size: 8, data);
565 PrintRegisterValue(reg_ctx, name: "x20", alt_name: nullptr, reg_byte_size: 8, data);
566 PrintRegisterValue(reg_ctx, name: "x21", alt_name: nullptr, reg_byte_size: 8, data);
567 PrintRegisterValue(reg_ctx, name: "x22", alt_name: nullptr, reg_byte_size: 8, data);
568 PrintRegisterValue(reg_ctx, name: "x23", alt_name: nullptr, reg_byte_size: 8, data);
569 PrintRegisterValue(reg_ctx, name: "x24", alt_name: nullptr, reg_byte_size: 8, data);
570 PrintRegisterValue(reg_ctx, name: "x25", alt_name: nullptr, reg_byte_size: 8, data);
571 PrintRegisterValue(reg_ctx, name: "x26", alt_name: nullptr, reg_byte_size: 8, data);
572 PrintRegisterValue(reg_ctx, name: "x27", alt_name: nullptr, reg_byte_size: 8, data);
573 PrintRegisterValue(reg_ctx, name: "x28", alt_name: nullptr, reg_byte_size: 8, data);
574 PrintRegisterValue(reg_ctx, name: "fp", alt_name: nullptr, reg_byte_size: 8, data);
575 PrintRegisterValue(reg_ctx, name: "lr", alt_name: nullptr, reg_byte_size: 8, data);
576 PrintRegisterValue(reg_ctx, name: "sp", alt_name: nullptr, reg_byte_size: 8, data);
577 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 8, data);
578 PrintRegisterValue(reg_ctx, name: "cpsr", alt_name: nullptr, reg_byte_size: 4, data);
579 data.PutHex32(uvalue: 0); // uint32_t pad at the end
580
581 // Write out the EXC registers
582 data.PutHex32(uvalue: EXCRegSet);
583 data.PutHex32(uvalue: EXCWordCount);
584 PrintRegisterValue(reg_ctx, name: "far", alt_name: nullptr, reg_byte_size: 8, data);
585 PrintRegisterValue(reg_ctx, name: "esr", alt_name: nullptr, reg_byte_size: 4, data);
586 PrintRegisterValue(reg_ctx, name: "exception", alt_name: nullptr, reg_byte_size: 4, data);
587 return true;
588 }
589 return false;
590 }
591
592protected:
593 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
594
595 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
596
597 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
598
599 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
600
601 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
602 return 0;
603 }
604
605 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
606 return 0;
607 }
608
609 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
610 return 0;
611 }
612
613 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
614 return -1;
615 }
616};
617
618class RegisterContextDarwin_riscv32_Mach
619 : public RegisterContextDarwin_riscv32 {
620public:
621 RegisterContextDarwin_riscv32_Mach(lldb_private::Thread &thread,
622 const DataExtractor &data)
623 : RegisterContextDarwin_riscv32(thread, 0) {
624 SetRegisterDataFrom_LC_THREAD(data);
625 }
626
627 void InvalidateAllRegisters() override {
628 // Do nothing... registers are always valid...
629 }
630
631 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
632 lldb::offset_t offset = 0;
633 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
634 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
635 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
636 SetError(flavor: CSRRegSet, err_idx: Read, err: -1);
637 while (offset < data.GetByteSize()) {
638 int flavor = data.GetU32(offset_ptr: &offset);
639 uint32_t count = data.GetU32(offset_ptr: &offset);
640 offset_t next_thread_state = offset + (count * 4);
641 switch (flavor) {
642 case GPRRegSet:
643 // x0-x31 + pc
644 if (count >= 32) {
645 for (uint32_t i = 0; i < 32; ++i)
646 ((uint32_t *)&gpr.x0)[i] = data.GetU32(offset_ptr: &offset);
647 gpr.pc = data.GetU32(offset_ptr: &offset);
648 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
649 }
650 break;
651 case FPURegSet: {
652 // f0-f31 + fcsr
653 if (count >= 32) {
654 for (uint32_t i = 0; i < 32; ++i)
655 ((uint32_t *)&fpr.f0)[i] = data.GetU32(offset_ptr: &offset);
656 fpr.fcsr = data.GetU32(offset_ptr: &offset);
657 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
658 }
659 } break;
660 case EXCRegSet:
661 if (count == 3) {
662 exc.exception = data.GetU32(offset_ptr: &offset);
663 exc.fsr = data.GetU32(offset_ptr: &offset);
664 exc.far = data.GetU32(offset_ptr: &offset);
665 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
666 }
667 break;
668 }
669 offset = next_thread_state;
670 }
671 }
672
673 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
674 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
675 if (reg_ctx_sp) {
676 RegisterContext *reg_ctx = reg_ctx_sp.get();
677
678 data.PutHex32(uvalue: GPRRegSet); // Flavor
679 data.PutHex32(uvalue: GPRWordCount);
680 PrintRegisterValue(reg_ctx, name: "x0", alt_name: nullptr, reg_byte_size: 4, data);
681 PrintRegisterValue(reg_ctx, name: "x1", alt_name: nullptr, reg_byte_size: 4, data);
682 PrintRegisterValue(reg_ctx, name: "x2", alt_name: nullptr, reg_byte_size: 4, data);
683 PrintRegisterValue(reg_ctx, name: "x3", alt_name: nullptr, reg_byte_size: 4, data);
684 PrintRegisterValue(reg_ctx, name: "x4", alt_name: nullptr, reg_byte_size: 4, data);
685 PrintRegisterValue(reg_ctx, name: "x5", alt_name: nullptr, reg_byte_size: 4, data);
686 PrintRegisterValue(reg_ctx, name: "x6", alt_name: nullptr, reg_byte_size: 4, data);
687 PrintRegisterValue(reg_ctx, name: "x7", alt_name: nullptr, reg_byte_size: 4, data);
688 PrintRegisterValue(reg_ctx, name: "x8", alt_name: nullptr, reg_byte_size: 4, data);
689 PrintRegisterValue(reg_ctx, name: "x9", alt_name: nullptr, reg_byte_size: 4, data);
690 PrintRegisterValue(reg_ctx, name: "x10", alt_name: nullptr, reg_byte_size: 4, data);
691 PrintRegisterValue(reg_ctx, name: "x11", alt_name: nullptr, reg_byte_size: 4, data);
692 PrintRegisterValue(reg_ctx, name: "x12", alt_name: nullptr, reg_byte_size: 4, data);
693 PrintRegisterValue(reg_ctx, name: "x13", alt_name: nullptr, reg_byte_size: 4, data);
694 PrintRegisterValue(reg_ctx, name: "x14", alt_name: nullptr, reg_byte_size: 4, data);
695 PrintRegisterValue(reg_ctx, name: "x15", alt_name: nullptr, reg_byte_size: 4, data);
696 PrintRegisterValue(reg_ctx, name: "x16", alt_name: nullptr, reg_byte_size: 4, data);
697 PrintRegisterValue(reg_ctx, name: "x17", alt_name: nullptr, reg_byte_size: 4, data);
698 PrintRegisterValue(reg_ctx, name: "x18", alt_name: nullptr, reg_byte_size: 4, data);
699 PrintRegisterValue(reg_ctx, name: "x19", alt_name: nullptr, reg_byte_size: 4, data);
700 PrintRegisterValue(reg_ctx, name: "x20", alt_name: nullptr, reg_byte_size: 4, data);
701 PrintRegisterValue(reg_ctx, name: "x21", alt_name: nullptr, reg_byte_size: 4, data);
702 PrintRegisterValue(reg_ctx, name: "x22", alt_name: nullptr, reg_byte_size: 4, data);
703 PrintRegisterValue(reg_ctx, name: "x23", alt_name: nullptr, reg_byte_size: 4, data);
704 PrintRegisterValue(reg_ctx, name: "x24", alt_name: nullptr, reg_byte_size: 4, data);
705 PrintRegisterValue(reg_ctx, name: "x25", alt_name: nullptr, reg_byte_size: 4, data);
706 PrintRegisterValue(reg_ctx, name: "x26", alt_name: nullptr, reg_byte_size: 4, data);
707 PrintRegisterValue(reg_ctx, name: "x27", alt_name: nullptr, reg_byte_size: 4, data);
708 PrintRegisterValue(reg_ctx, name: "x28", alt_name: nullptr, reg_byte_size: 4, data);
709 PrintRegisterValue(reg_ctx, name: "x29", alt_name: nullptr, reg_byte_size: 4, data);
710 PrintRegisterValue(reg_ctx, name: "x30", alt_name: nullptr, reg_byte_size: 4, data);
711 PrintRegisterValue(reg_ctx, name: "x31", alt_name: nullptr, reg_byte_size: 4, data);
712 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 4, data);
713 data.PutHex32(uvalue: 0); // uint32_t pad at the end
714
715 // Write out the EXC registers
716 data.PutHex32(uvalue: EXCRegSet);
717 data.PutHex32(uvalue: EXCWordCount);
718 PrintRegisterValue(reg_ctx, name: "exception", alt_name: nullptr, reg_byte_size: 4, data);
719 PrintRegisterValue(reg_ctx, name: "fsr", alt_name: nullptr, reg_byte_size: 4, data);
720 PrintRegisterValue(reg_ctx, name: "far", alt_name: nullptr, reg_byte_size: 4, data);
721 return true;
722 }
723 return false;
724 }
725
726protected:
727 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
728
729 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
730
731 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
732
733 int DoReadCSR(lldb::tid_t tid, int flavor, CSR &csr) override { return -1; }
734
735 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
736 return 0;
737 }
738
739 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
740 return 0;
741 }
742
743 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
744 return 0;
745 }
746
747 int DoWriteCSR(lldb::tid_t tid, int flavor, const CSR &csr) override {
748 return 0;
749 }
750};
751
752static uint32_t MachHeaderSizeFromMagic(uint32_t magic) {
753 switch (magic) {
754 case MH_MAGIC:
755 case MH_CIGAM:
756 return sizeof(struct llvm::MachO::mach_header);
757
758 case MH_MAGIC_64:
759 case MH_CIGAM_64:
760 return sizeof(struct llvm::MachO::mach_header_64);
761 break;
762
763 default:
764 break;
765 }
766 return 0;
767}
768
769#define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
770
771char ObjectFileMachO::ID;
772
773void ObjectFileMachO::Initialize() {
774 PluginManager::RegisterPlugin(
775 name: GetPluginNameStatic(), description: GetPluginDescriptionStatic(), create_callback: CreateInstance,
776 create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications, save_core: SaveCore);
777}
778
779void ObjectFileMachO::Terminate() {
780 PluginManager::UnregisterPlugin(create_callback: CreateInstance);
781}
782
783ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
784 DataBufferSP data_sp,
785 lldb::offset_t data_offset,
786 const FileSpec *file,
787 lldb::offset_t file_offset,
788 lldb::offset_t length) {
789 if (!data_sp) {
790 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
791 if (!data_sp)
792 return nullptr;
793 data_offset = 0;
794 }
795
796 if (!ObjectFileMachO::MagicBytesMatch(data_sp, offset: data_offset, length))
797 return nullptr;
798
799 // Update the data to contain the entire file if it doesn't already
800 if (data_sp->GetByteSize() < length) {
801 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
802 if (!data_sp)
803 return nullptr;
804 data_offset = 0;
805 }
806 auto objfile_up = std::make_unique<ObjectFileMachO>(
807 args: module_sp, args&: data_sp, args&: data_offset, args&: file, args&: file_offset, args&: length);
808 if (!objfile_up || !objfile_up->ParseHeader())
809 return nullptr;
810
811 return objfile_up.release();
812}
813
814ObjectFile *ObjectFileMachO::CreateMemoryInstance(
815 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
816 const ProcessSP &process_sp, lldb::addr_t header_addr) {
817 if (ObjectFileMachO::MagicBytesMatch(data_sp, offset: 0, length: data_sp->GetByteSize())) {
818 std::unique_ptr<ObjectFile> objfile_up(
819 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
820 if (objfile_up.get() && objfile_up->ParseHeader())
821 return objfile_up.release();
822 }
823 return nullptr;
824}
825
826size_t ObjectFileMachO::GetModuleSpecifications(
827 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
828 lldb::offset_t data_offset, lldb::offset_t file_offset,
829 lldb::offset_t length, lldb_private::ModuleSpecList &specs) {
830 const size_t initial_count = specs.GetSize();
831
832 if (ObjectFileMachO::MagicBytesMatch(data_sp, offset: 0, length: data_sp->GetByteSize())) {
833 DataExtractor data;
834 data.SetData(data_sp);
835 llvm::MachO::mach_header header;
836 if (ParseHeader(data, data_offset_ptr: &data_offset, header)) {
837 size_t header_and_load_cmds =
838 header.sizeofcmds + MachHeaderSizeFromMagic(magic: header.magic);
839 if (header_and_load_cmds >= data_sp->GetByteSize()) {
840 data_sp = MapFileData(file, Size: header_and_load_cmds, Offset: file_offset);
841 data.SetData(data_sp);
842 data_offset = MachHeaderSizeFromMagic(magic: header.magic);
843 }
844 if (data_sp) {
845 ModuleSpec base_spec;
846 base_spec.GetFileSpec() = file;
847 base_spec.SetObjectOffset(file_offset);
848 base_spec.SetObjectSize(length);
849 GetAllArchSpecs(header, data, lc_offset: data_offset, base_spec, all_specs&: specs);
850 }
851 }
852 }
853 return specs.GetSize() - initial_count;
854}
855
856ConstString ObjectFileMachO::GetSegmentNameTEXT() {
857 static ConstString g_segment_name_TEXT("__TEXT");
858 return g_segment_name_TEXT;
859}
860
861ConstString ObjectFileMachO::GetSegmentNameDATA() {
862 static ConstString g_segment_name_DATA("__DATA");
863 return g_segment_name_DATA;
864}
865
866ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() {
867 static ConstString g_segment_name("__DATA_DIRTY");
868 return g_segment_name;
869}
870
871ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() {
872 static ConstString g_segment_name("__DATA_CONST");
873 return g_segment_name;
874}
875
876ConstString ObjectFileMachO::GetSegmentNameOBJC() {
877 static ConstString g_segment_name_OBJC("__OBJC");
878 return g_segment_name_OBJC;
879}
880
881ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() {
882 static ConstString g_section_name_LINKEDIT("__LINKEDIT");
883 return g_section_name_LINKEDIT;
884}
885
886ConstString ObjectFileMachO::GetSegmentNameDWARF() {
887 static ConstString g_section_name("__DWARF");
888 return g_section_name;
889}
890
891ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() {
892 static ConstString g_section_name("__LLVM_COV");
893 return g_section_name;
894}
895
896ConstString ObjectFileMachO::GetSectionNameEHFrame() {
897 static ConstString g_section_name_eh_frame("__eh_frame");
898 return g_section_name_eh_frame;
899}
900
901ConstString ObjectFileMachO::GetSectionNameLLDBNoNlist() {
902 static ConstString g_section_name_lldb_no_nlist("__lldb_no_nlist");
903 return g_section_name_lldb_no_nlist;
904}
905
906bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
907 lldb::addr_t data_offset,
908 lldb::addr_t data_length) {
909 DataExtractor data;
910 data.SetData(data_sp, offset: data_offset, length: data_length);
911 lldb::offset_t offset = 0;
912 uint32_t magic = data.GetU32(offset_ptr: &offset);
913
914 offset += 4; // cputype
915 offset += 4; // cpusubtype
916 uint32_t filetype = data.GetU32(offset_ptr: &offset);
917
918 // A fileset has a Mach-O header but is not an
919 // individual file and must be handled via an
920 // ObjectContainer plugin.
921 if (filetype == llvm::MachO::MH_FILESET)
922 return false;
923
924 return MachHeaderSizeFromMagic(magic) != 0;
925}
926
927ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
928 DataBufferSP data_sp,
929 lldb::offset_t data_offset,
930 const FileSpec *file,
931 lldb::offset_t file_offset,
932 lldb::offset_t length)
933 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
934 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
935 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
936 m_allow_assembly_emulation_unwind_plans(true) {
937 ::memset(s: &m_header, c: 0, n: sizeof(m_header));
938 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
939}
940
941ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
942 lldb::WritableDataBufferSP header_data_sp,
943 const lldb::ProcessSP &process_sp,
944 lldb::addr_t header_addr)
945 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
946 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
947 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
948 m_allow_assembly_emulation_unwind_plans(true) {
949 ::memset(s: &m_header, c: 0, n: sizeof(m_header));
950 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
951}
952
953bool ObjectFileMachO::ParseHeader(DataExtractor &data,
954 lldb::offset_t *data_offset_ptr,
955 llvm::MachO::mach_header &header) {
956 data.SetByteOrder(endian::InlHostByteOrder());
957 // Leave magic in the original byte order
958 header.magic = data.GetU32(offset_ptr: data_offset_ptr);
959 bool can_parse = false;
960 bool is_64_bit = false;
961 switch (header.magic) {
962 case MH_MAGIC:
963 data.SetByteOrder(endian::InlHostByteOrder());
964 data.SetAddressByteSize(4);
965 can_parse = true;
966 break;
967
968 case MH_MAGIC_64:
969 data.SetByteOrder(endian::InlHostByteOrder());
970 data.SetAddressByteSize(8);
971 can_parse = true;
972 is_64_bit = true;
973 break;
974
975 case MH_CIGAM:
976 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
977 ? eByteOrderLittle
978 : eByteOrderBig);
979 data.SetAddressByteSize(4);
980 can_parse = true;
981 break;
982
983 case MH_CIGAM_64:
984 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
985 ? eByteOrderLittle
986 : eByteOrderBig);
987 data.SetAddressByteSize(8);
988 is_64_bit = true;
989 can_parse = true;
990 break;
991
992 default:
993 break;
994 }
995
996 if (can_parse) {
997 data.GetU32(offset_ptr: data_offset_ptr, dst: &header.cputype, count: 6);
998 if (is_64_bit)
999 *data_offset_ptr += 4;
1000 return true;
1001 } else {
1002 memset(s: &header, c: 0, n: sizeof(header));
1003 }
1004 return false;
1005}
1006
1007bool ObjectFileMachO::ParseHeader() {
1008 ModuleSP module_sp(GetModule());
1009 if (!module_sp)
1010 return false;
1011
1012 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1013 bool can_parse = false;
1014 lldb::offset_t offset = 0;
1015 m_data.SetByteOrder(endian::InlHostByteOrder());
1016 // Leave magic in the original byte order
1017 m_header.magic = m_data.GetU32(offset_ptr: &offset);
1018 switch (m_header.magic) {
1019 case MH_MAGIC:
1020 m_data.SetByteOrder(endian::InlHostByteOrder());
1021 m_data.SetAddressByteSize(4);
1022 can_parse = true;
1023 break;
1024
1025 case MH_MAGIC_64:
1026 m_data.SetByteOrder(endian::InlHostByteOrder());
1027 m_data.SetAddressByteSize(8);
1028 can_parse = true;
1029 break;
1030
1031 case MH_CIGAM:
1032 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1033 ? eByteOrderLittle
1034 : eByteOrderBig);
1035 m_data.SetAddressByteSize(4);
1036 can_parse = true;
1037 break;
1038
1039 case MH_CIGAM_64:
1040 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1041 ? eByteOrderLittle
1042 : eByteOrderBig);
1043 m_data.SetAddressByteSize(8);
1044 can_parse = true;
1045 break;
1046
1047 default:
1048 break;
1049 }
1050
1051 if (can_parse) {
1052 m_data.GetU32(offset_ptr: &offset, dst: &m_header.cputype, count: 6);
1053
1054 ModuleSpecList all_specs;
1055 ModuleSpec base_spec;
1056 GetAllArchSpecs(header: m_header, data: m_data, lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic),
1057 base_spec, all_specs);
1058
1059 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1060 ArchSpec mach_arch =
1061 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
1062
1063 // Check if the module has a required architecture
1064 const ArchSpec &module_arch = module_sp->GetArchitecture();
1065 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(rhs: mach_arch))
1066 continue;
1067
1068 if (SetModulesArchitecture(mach_arch)) {
1069 const size_t header_and_lc_size =
1070 m_header.sizeofcmds + MachHeaderSizeFromMagic(magic: m_header.magic);
1071 if (m_data.GetByteSize() < header_and_lc_size) {
1072 DataBufferSP data_sp;
1073 ProcessSP process_sp(m_process_wp.lock());
1074 if (process_sp) {
1075 data_sp = ReadMemory(process_sp, addr: m_memory_addr, byte_size: header_and_lc_size);
1076 } else {
1077 // Read in all only the load command data from the file on disk
1078 data_sp = MapFileData(file: m_file, Size: header_and_lc_size, Offset: m_file_offset);
1079 if (data_sp->GetByteSize() != header_and_lc_size)
1080 continue;
1081 }
1082 if (data_sp)
1083 m_data.SetData(data_sp);
1084 }
1085 }
1086 return true;
1087 }
1088 // None found.
1089 return false;
1090 } else {
1091 memset(s: &m_header, c: 0, n: sizeof(struct llvm::MachO::mach_header));
1092 }
1093 return false;
1094}
1095
1096ByteOrder ObjectFileMachO::GetByteOrder() const {
1097 return m_data.GetByteOrder();
1098}
1099
1100bool ObjectFileMachO::IsExecutable() const {
1101 return m_header.filetype == MH_EXECUTE;
1102}
1103
1104bool ObjectFileMachO::IsDynamicLoader() const {
1105 return m_header.filetype == MH_DYLINKER;
1106}
1107
1108bool ObjectFileMachO::IsSharedCacheBinary() const {
1109 return m_header.flags & MH_DYLIB_IN_CACHE;
1110}
1111
1112bool ObjectFileMachO::IsKext() const {
1113 return m_header.filetype == MH_KEXT_BUNDLE;
1114}
1115
1116uint32_t ObjectFileMachO::GetAddressByteSize() const {
1117 return m_data.GetAddressByteSize();
1118}
1119
1120AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
1121 Symtab *symtab = GetSymtab();
1122 if (!symtab)
1123 return AddressClass::eUnknown;
1124
1125 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1126 if (symbol) {
1127 if (symbol->ValueIsAddress()) {
1128 SectionSP section_sp(symbol->GetAddressRef().GetSection());
1129 if (section_sp) {
1130 const lldb::SectionType section_type = section_sp->GetType();
1131 switch (section_type) {
1132 case eSectionTypeInvalid:
1133 return AddressClass::eUnknown;
1134
1135 case eSectionTypeCode:
1136 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1137 // For ARM we have a bit in the n_desc field of the symbol that
1138 // tells us ARM/Thumb which is bit 0x0008.
1139 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1140 return AddressClass::eCodeAlternateISA;
1141 }
1142 return AddressClass::eCode;
1143
1144 case eSectionTypeContainer:
1145 return AddressClass::eUnknown;
1146
1147 case eSectionTypeData:
1148 case eSectionTypeDataCString:
1149 case eSectionTypeDataCStringPointers:
1150 case eSectionTypeDataSymbolAddress:
1151 case eSectionTypeData4:
1152 case eSectionTypeData8:
1153 case eSectionTypeData16:
1154 case eSectionTypeDataPointers:
1155 case eSectionTypeZeroFill:
1156 case eSectionTypeDataObjCMessageRefs:
1157 case eSectionTypeDataObjCCFStrings:
1158 case eSectionTypeGoSymtab:
1159 return AddressClass::eData;
1160
1161 case eSectionTypeDebug:
1162 case eSectionTypeDWARFDebugAbbrev:
1163 case eSectionTypeDWARFDebugAbbrevDwo:
1164 case eSectionTypeDWARFDebugAddr:
1165 case eSectionTypeDWARFDebugAranges:
1166 case eSectionTypeDWARFDebugCuIndex:
1167 case eSectionTypeDWARFDebugFrame:
1168 case eSectionTypeDWARFDebugInfo:
1169 case eSectionTypeDWARFDebugInfoDwo:
1170 case eSectionTypeDWARFDebugLine:
1171 case eSectionTypeDWARFDebugLineStr:
1172 case eSectionTypeDWARFDebugLoc:
1173 case eSectionTypeDWARFDebugLocDwo:
1174 case eSectionTypeDWARFDebugLocLists:
1175 case eSectionTypeDWARFDebugLocListsDwo:
1176 case eSectionTypeDWARFDebugMacInfo:
1177 case eSectionTypeDWARFDebugMacro:
1178 case eSectionTypeDWARFDebugNames:
1179 case eSectionTypeDWARFDebugPubNames:
1180 case eSectionTypeDWARFDebugPubTypes:
1181 case eSectionTypeDWARFDebugRanges:
1182 case eSectionTypeDWARFDebugRngLists:
1183 case eSectionTypeDWARFDebugRngListsDwo:
1184 case eSectionTypeDWARFDebugStr:
1185 case eSectionTypeDWARFDebugStrDwo:
1186 case eSectionTypeDWARFDebugStrOffsets:
1187 case eSectionTypeDWARFDebugStrOffsetsDwo:
1188 case eSectionTypeDWARFDebugTuIndex:
1189 case eSectionTypeDWARFDebugTypes:
1190 case eSectionTypeDWARFDebugTypesDwo:
1191 case eSectionTypeDWARFAppleNames:
1192 case eSectionTypeDWARFAppleTypes:
1193 case eSectionTypeDWARFAppleNamespaces:
1194 case eSectionTypeDWARFAppleObjC:
1195 case eSectionTypeDWARFGNUDebugAltLink:
1196 case eSectionTypeCTF:
1197 case eSectionTypeLLDBTypeSummaries:
1198 case eSectionTypeLLDBFormatters:
1199 case eSectionTypeSwiftModules:
1200 return AddressClass::eDebug;
1201
1202 case eSectionTypeEHFrame:
1203 case eSectionTypeARMexidx:
1204 case eSectionTypeARMextab:
1205 case eSectionTypeCompactUnwind:
1206 return AddressClass::eRuntime;
1207
1208 case eSectionTypeAbsoluteAddress:
1209 case eSectionTypeELFSymbolTable:
1210 case eSectionTypeELFDynamicSymbols:
1211 case eSectionTypeELFRelocationEntries:
1212 case eSectionTypeELFDynamicLinkInfo:
1213 case eSectionTypeOther:
1214 return AddressClass::eUnknown;
1215 }
1216 }
1217 }
1218
1219 const SymbolType symbol_type = symbol->GetType();
1220 switch (symbol_type) {
1221 case eSymbolTypeAny:
1222 return AddressClass::eUnknown;
1223 case eSymbolTypeAbsolute:
1224 return AddressClass::eUnknown;
1225
1226 case eSymbolTypeCode:
1227 case eSymbolTypeTrampoline:
1228 case eSymbolTypeResolver:
1229 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1230 // For ARM we have a bit in the n_desc field of the symbol that tells
1231 // us ARM/Thumb which is bit 0x0008.
1232 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1233 return AddressClass::eCodeAlternateISA;
1234 }
1235 return AddressClass::eCode;
1236
1237 case eSymbolTypeData:
1238 return AddressClass::eData;
1239 case eSymbolTypeRuntime:
1240 return AddressClass::eRuntime;
1241 case eSymbolTypeException:
1242 return AddressClass::eRuntime;
1243 case eSymbolTypeSourceFile:
1244 return AddressClass::eDebug;
1245 case eSymbolTypeHeaderFile:
1246 return AddressClass::eDebug;
1247 case eSymbolTypeObjectFile:
1248 return AddressClass::eDebug;
1249 case eSymbolTypeCommonBlock:
1250 return AddressClass::eDebug;
1251 case eSymbolTypeBlock:
1252 return AddressClass::eDebug;
1253 case eSymbolTypeLocal:
1254 return AddressClass::eData;
1255 case eSymbolTypeParam:
1256 return AddressClass::eData;
1257 case eSymbolTypeVariable:
1258 return AddressClass::eData;
1259 case eSymbolTypeVariableType:
1260 return AddressClass::eDebug;
1261 case eSymbolTypeLineEntry:
1262 return AddressClass::eDebug;
1263 case eSymbolTypeLineHeader:
1264 return AddressClass::eDebug;
1265 case eSymbolTypeScopeBegin:
1266 return AddressClass::eDebug;
1267 case eSymbolTypeScopeEnd:
1268 return AddressClass::eDebug;
1269 case eSymbolTypeAdditional:
1270 return AddressClass::eUnknown;
1271 case eSymbolTypeCompiler:
1272 return AddressClass::eDebug;
1273 case eSymbolTypeInstrumentation:
1274 return AddressClass::eDebug;
1275 case eSymbolTypeUndefined:
1276 return AddressClass::eUnknown;
1277 case eSymbolTypeObjCClass:
1278 return AddressClass::eRuntime;
1279 case eSymbolTypeObjCMetaClass:
1280 return AddressClass::eRuntime;
1281 case eSymbolTypeObjCIVar:
1282 return AddressClass::eRuntime;
1283 case eSymbolTypeReExported:
1284 return AddressClass::eRuntime;
1285 }
1286 }
1287 return AddressClass::eUnknown;
1288}
1289
1290bool ObjectFileMachO::IsStripped() {
1291 if (m_dysymtab.cmd == 0) {
1292 ModuleSP module_sp(GetModule());
1293 if (module_sp) {
1294 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1295 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1296 const lldb::offset_t load_cmd_offset = offset;
1297
1298 llvm::MachO::load_command lc = {};
1299 if (m_data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
1300 break;
1301 if (lc.cmd == LC_DYSYMTAB) {
1302 m_dysymtab.cmd = lc.cmd;
1303 m_dysymtab.cmdsize = lc.cmdsize;
1304 if (m_data.GetU32(offset_ptr: &offset, dst: &m_dysymtab.ilocalsym,
1305 count: (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1306 nullptr) {
1307 // Clear m_dysymtab if we were unable to read all items from the
1308 // load command
1309 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
1310 }
1311 }
1312 offset = load_cmd_offset + lc.cmdsize;
1313 }
1314 }
1315 }
1316 if (m_dysymtab.cmd)
1317 return m_dysymtab.nlocalsym <= 1;
1318 return false;
1319}
1320
1321ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() {
1322 EncryptedFileRanges result;
1323 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1324
1325 llvm::MachO::encryption_info_command encryption_cmd;
1326 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1327 const lldb::offset_t load_cmd_offset = offset;
1328 if (m_data.GetU32(offset_ptr: &offset, dst: &encryption_cmd, count: 2) == nullptr)
1329 break;
1330
1331 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1332 // 3 fields we care about, so treat them the same.
1333 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1334 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1335 if (m_data.GetU32(offset_ptr: &offset, dst: &encryption_cmd.cryptoff, count: 3)) {
1336 if (encryption_cmd.cryptid != 0) {
1337 EncryptedFileRanges::Entry entry;
1338 entry.SetRangeBase(encryption_cmd.cryptoff);
1339 entry.SetByteSize(encryption_cmd.cryptsize);
1340 result.Append(entry);
1341 }
1342 }
1343 }
1344 offset = load_cmd_offset + encryption_cmd.cmdsize;
1345 }
1346
1347 return result;
1348}
1349
1350void ObjectFileMachO::SanitizeSegmentCommand(
1351 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1352 if (m_length == 0 || seg_cmd.filesize == 0)
1353 return;
1354
1355 if (IsSharedCacheBinary() && !IsInMemory()) {
1356 // In shared cache images, the load commands are relative to the
1357 // shared cache file, and not the specific image we are
1358 // examining. Let's fix this up so that it looks like a normal
1359 // image.
1360 if (strncmp(s1: seg_cmd.segname, s2: GetSegmentNameTEXT().GetCString(),
1361 n: sizeof(seg_cmd.segname)) == 0)
1362 m_text_address = seg_cmd.vmaddr;
1363 if (strncmp(s1: seg_cmd.segname, s2: GetSegmentNameLINKEDIT().GetCString(),
1364 n: sizeof(seg_cmd.segname)) == 0)
1365 m_linkedit_original_offset = seg_cmd.fileoff;
1366
1367 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1368 }
1369
1370 if (seg_cmd.fileoff > m_length) {
1371 // We have a load command that says it extends past the end of the file.
1372 // This is likely a corrupt file. We don't have any way to return an error
1373 // condition here (this method was likely invoked from something like
1374 // ObjectFile::GetSectionList()), so we just null out the section contents,
1375 // and dump a message to stdout. The most common case here is core file
1376 // debugging with a truncated file.
1377 const char *lc_segment_name =
1378 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1379 GetModule()->ReportWarning(
1380 format: "load command {0} {1} has a fileoff ({2:x16}) that extends beyond "
1381 "the end of the file ({3:x16}), ignoring this section",
1382 args&: cmd_idx, args&: lc_segment_name, args&: seg_cmd.fileoff, args&: m_length);
1383
1384 seg_cmd.fileoff = 0;
1385 seg_cmd.filesize = 0;
1386 }
1387
1388 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1389 // We have a load command that says it extends past the end of the file.
1390 // This is likely a corrupt file. We don't have any way to return an error
1391 // condition here (this method was likely invoked from something like
1392 // ObjectFile::GetSectionList()), so we just null out the section contents,
1393 // and dump a message to stdout. The most common case here is core file
1394 // debugging with a truncated file.
1395 const char *lc_segment_name =
1396 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1397 GetModule()->ReportWarning(
1398 format: "load command {0} {1} has a fileoff + filesize ({2:x16}) that "
1399 "extends beyond the end of the file ({3:x16}), the segment will be "
1400 "truncated to match",
1401 args&: cmd_idx, args&: lc_segment_name, args: seg_cmd.fileoff + seg_cmd.filesize, args&: m_length);
1402
1403 // Truncate the length
1404 seg_cmd.filesize = m_length - seg_cmd.fileoff;
1405 }
1406}
1407
1408static uint32_t
1409GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1410 uint32_t result = 0;
1411 if (seg_cmd.initprot & VM_PROT_READ)
1412 result |= ePermissionsReadable;
1413 if (seg_cmd.initprot & VM_PROT_WRITE)
1414 result |= ePermissionsWritable;
1415 if (seg_cmd.initprot & VM_PROT_EXECUTE)
1416 result |= ePermissionsExecutable;
1417 return result;
1418}
1419
1420static lldb::SectionType GetSectionType(uint32_t flags,
1421 ConstString section_name) {
1422
1423 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1424 return eSectionTypeCode;
1425
1426 uint32_t mach_sect_type = flags & SECTION_TYPE;
1427 static ConstString g_sect_name_objc_data("__objc_data");
1428 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1429 static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1430 static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1431 static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1432 static ConstString g_sect_name_objc_const("__objc_const");
1433 static ConstString g_sect_name_objc_classlist("__objc_classlist");
1434 static ConstString g_sect_name_cfstring("__cfstring");
1435
1436 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs");
1437 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo");
1438 static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1439 static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1440 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1441 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1442 static ConstString g_sect_name_eh_frame("__eh_frame");
1443 static ConstString g_sect_name_compact_unwind("__unwind_info");
1444 static ConstString g_sect_name_text("__text");
1445 static ConstString g_sect_name_data("__data");
1446 static ConstString g_sect_name_go_symtab("__gosymtab");
1447 static ConstString g_sect_name_ctf("__ctf");
1448 static ConstString g_sect_name_lldb_summaries("__lldbsummaries");
1449 static ConstString g_sect_name_lldb_formatters("__lldbformatters");
1450 static ConstString g_sect_name_swift_ast("__swift_ast");
1451
1452 if (section_name == g_sect_name_dwarf_debug_str_offs)
1453 return eSectionTypeDWARFDebugStrOffsets;
1454 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo)
1455 return eSectionTypeDWARFDebugStrOffsetsDwo;
1456
1457 llvm::StringRef stripped_name = section_name.GetStringRef();
1458 if (stripped_name.consume_front(Prefix: "__debug_"))
1459 return ObjectFile::GetDWARFSectionTypeFromName(name: stripped_name);
1460
1461 if (section_name == g_sect_name_dwarf_apple_names)
1462 return eSectionTypeDWARFAppleNames;
1463 if (section_name == g_sect_name_dwarf_apple_types)
1464 return eSectionTypeDWARFAppleTypes;
1465 if (section_name == g_sect_name_dwarf_apple_namespaces)
1466 return eSectionTypeDWARFAppleNamespaces;
1467 if (section_name == g_sect_name_dwarf_apple_objc)
1468 return eSectionTypeDWARFAppleObjC;
1469 if (section_name == g_sect_name_objc_selrefs)
1470 return eSectionTypeDataCStringPointers;
1471 if (section_name == g_sect_name_objc_msgrefs)
1472 return eSectionTypeDataObjCMessageRefs;
1473 if (section_name == g_sect_name_eh_frame)
1474 return eSectionTypeEHFrame;
1475 if (section_name == g_sect_name_compact_unwind)
1476 return eSectionTypeCompactUnwind;
1477 if (section_name == g_sect_name_cfstring)
1478 return eSectionTypeDataObjCCFStrings;
1479 if (section_name == g_sect_name_go_symtab)
1480 return eSectionTypeGoSymtab;
1481 if (section_name == g_sect_name_ctf)
1482 return eSectionTypeCTF;
1483 if (section_name == g_sect_name_lldb_summaries)
1484 return lldb::eSectionTypeLLDBTypeSummaries;
1485 if (section_name == g_sect_name_lldb_formatters)
1486 return lldb::eSectionTypeLLDBFormatters;
1487 if (section_name == g_sect_name_swift_ast)
1488 return eSectionTypeSwiftModules;
1489 if (section_name == g_sect_name_objc_data ||
1490 section_name == g_sect_name_objc_classrefs ||
1491 section_name == g_sect_name_objc_superrefs ||
1492 section_name == g_sect_name_objc_const ||
1493 section_name == g_sect_name_objc_classlist) {
1494 return eSectionTypeDataPointers;
1495 }
1496
1497 switch (mach_sect_type) {
1498 // TODO: categorize sections by other flags for regular sections
1499 case S_REGULAR:
1500 if (section_name == g_sect_name_text)
1501 return eSectionTypeCode;
1502 if (section_name == g_sect_name_data)
1503 return eSectionTypeData;
1504 return eSectionTypeOther;
1505 case S_ZEROFILL:
1506 return eSectionTypeZeroFill;
1507 case S_CSTRING_LITERALS: // section with only literal C strings
1508 return eSectionTypeDataCString;
1509 case S_4BYTE_LITERALS: // section with only 4 byte literals
1510 return eSectionTypeData4;
1511 case S_8BYTE_LITERALS: // section with only 8 byte literals
1512 return eSectionTypeData8;
1513 case S_LITERAL_POINTERS: // section with only pointers to literals
1514 return eSectionTypeDataPointers;
1515 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1516 return eSectionTypeDataPointers;
1517 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1518 return eSectionTypeDataPointers;
1519 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1520 // the reserved2 field
1521 return eSectionTypeCode;
1522 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1523 // initialization
1524 return eSectionTypeDataPointers;
1525 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1526 // termination
1527 return eSectionTypeDataPointers;
1528 case S_COALESCED:
1529 return eSectionTypeOther;
1530 case S_GB_ZEROFILL:
1531 return eSectionTypeZeroFill;
1532 case S_INTERPOSING: // section with only pairs of function pointers for
1533 // interposing
1534 return eSectionTypeCode;
1535 case S_16BYTE_LITERALS: // section with only 16 byte literals
1536 return eSectionTypeData16;
1537 case S_DTRACE_DOF:
1538 return eSectionTypeDebug;
1539 case S_LAZY_DYLIB_SYMBOL_POINTERS:
1540 return eSectionTypeDataPointers;
1541 default:
1542 return eSectionTypeOther;
1543 }
1544}
1545
1546struct ObjectFileMachO::SegmentParsingContext {
1547 const EncryptedFileRanges EncryptedRanges;
1548 lldb_private::SectionList &UnifiedList;
1549 uint32_t NextSegmentIdx = 0;
1550 uint32_t NextSectionIdx = 0;
1551 bool FileAddressesChanged = false;
1552
1553 SegmentParsingContext(EncryptedFileRanges EncryptedRanges,
1554 lldb_private::SectionList &UnifiedList)
1555 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {}
1556};
1557
1558void ObjectFileMachO::ProcessSegmentCommand(
1559 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1560 uint32_t cmd_idx, SegmentParsingContext &context) {
1561 llvm::MachO::segment_command_64 load_cmd;
1562 memcpy(dest: &load_cmd, src: &load_cmd_, n: sizeof(load_cmd_));
1563
1564 if (!m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)load_cmd.segname, count: 16))
1565 return;
1566
1567 ModuleSP module_sp = GetModule();
1568 const bool is_core = GetType() == eTypeCoreFile;
1569 const bool is_dsym = (m_header.filetype == MH_DSYM);
1570 bool add_section = true;
1571 bool add_to_unified = true;
1572 ConstString const_segname(
1573 load_cmd.segname, strnlen(string: load_cmd.segname, maxlen: sizeof(load_cmd.segname)));
1574
1575 SectionSP unified_section_sp(
1576 context.UnifiedList.FindSectionByName(section_dstr: const_segname));
1577 if (is_dsym && unified_section_sp) {
1578 if (const_segname == GetSegmentNameLINKEDIT()) {
1579 // We need to keep the __LINKEDIT segment private to this object file
1580 // only
1581 add_to_unified = false;
1582 } else {
1583 // This is the dSYM file and this section has already been created by the
1584 // object file, no need to create it.
1585 add_section = false;
1586 }
1587 }
1588 load_cmd.vmaddr = m_data.GetAddress(offset_ptr: &offset);
1589 load_cmd.vmsize = m_data.GetAddress(offset_ptr: &offset);
1590 load_cmd.fileoff = m_data.GetAddress(offset_ptr: &offset);
1591 load_cmd.filesize = m_data.GetAddress(offset_ptr: &offset);
1592 if (!m_data.GetU32(offset_ptr: &offset, dst: &load_cmd.maxprot, count: 4))
1593 return;
1594
1595 SanitizeSegmentCommand(seg_cmd&: load_cmd, cmd_idx);
1596
1597 const uint32_t segment_permissions = GetSegmentPermissions(seg_cmd: load_cmd);
1598 const bool segment_is_encrypted =
1599 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1600
1601 // Use a segment ID of the segment index shifted left by 8 so they never
1602 // conflict with any of the sections.
1603 SectionSP segment_sp;
1604 if (add_section && (const_segname || is_core)) {
1605 segment_sp = std::make_shared<Section>(
1606 args&: module_sp, // Module to which this section belongs
1607 args: this, // Object file to which this sections belongs
1608 args: ++context.NextSegmentIdx
1609 << 8, // Section ID is the 1 based segment index
1610 // shifted right by 8 bits as not to collide with any of the 256
1611 // section IDs that are possible
1612 args&: const_segname, // Name of this section
1613 args: eSectionTypeContainer, // This section is a container of other
1614 // sections.
1615 args&: load_cmd.vmaddr, // File VM address == addresses as they are
1616 // found in the object file
1617 args&: load_cmd.vmsize, // VM size in bytes of this section
1618 args&: load_cmd.fileoff, // Offset to the data for this section in
1619 // the file
1620 args&: load_cmd.filesize, // Size in bytes of this section as found
1621 // in the file
1622 args: 0, // Segments have no alignment information
1623 args&: load_cmd.flags); // Flags for this section
1624
1625 segment_sp->SetIsEncrypted(segment_is_encrypted);
1626 m_sections_up->AddSection(section_sp: segment_sp);
1627 segment_sp->SetPermissions(segment_permissions);
1628 if (add_to_unified)
1629 context.UnifiedList.AddSection(section_sp: segment_sp);
1630 } else if (unified_section_sp) {
1631 // If this is a dSYM and the file addresses in the dSYM differ from the
1632 // file addresses in the ObjectFile, we must use the file base address for
1633 // the Section from the dSYM for the DWARF to resolve correctly.
1634 // This only happens with binaries in the shared cache in practice;
1635 // normally a mismatch like this would give a binary & dSYM that do not
1636 // match UUIDs. When a binary is included in the shared cache, its
1637 // segments are rearranged to optimize the shared cache, so its file
1638 // addresses will differ from what the ObjectFile had originally,
1639 // and what the dSYM has.
1640 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1641 Log *log = GetLog(mask: LLDBLog::Symbols);
1642 if (log) {
1643 log->Printf(
1644 format: "Installing dSYM's %s segment file address over ObjectFile's "
1645 "so symbol table/debug info resolves correctly for %s",
1646 const_segname.AsCString(),
1647 module_sp->GetFileSpec().GetFilename().AsCString());
1648 }
1649
1650 // Make sure we've parsed the symbol table from the ObjectFile before
1651 // we go around changing its Sections.
1652 module_sp->GetObjectFile()->GetSymtab();
1653 // eh_frame would present the same problems but we parse that on a per-
1654 // function basis as-needed so it's more difficult to remove its use of
1655 // the Sections. Realistically, the environments where this code path
1656 // will be taken will not have eh_frame sections.
1657
1658 unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1659
1660 // Notify the module that the section addresses have been changed once
1661 // we're done so any file-address caches can be updated.
1662 context.FileAddressesChanged = true;
1663 }
1664 m_sections_up->AddSection(section_sp: unified_section_sp);
1665 }
1666
1667 llvm::MachO::section_64 sect64;
1668 ::memset(s: &sect64, c: 0, n: sizeof(sect64));
1669 // Push a section into our mach sections for the section at index zero
1670 // (NO_SECT) if we don't have any mach sections yet...
1671 if (m_mach_sections.empty())
1672 m_mach_sections.push_back(x: sect64);
1673 uint32_t segment_sect_idx;
1674 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1675
1676 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1677 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1678 ++segment_sect_idx) {
1679 if (m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)sect64.sectname,
1680 count: sizeof(sect64.sectname)) == nullptr)
1681 break;
1682 if (m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)sect64.segname,
1683 count: sizeof(sect64.segname)) == nullptr)
1684 break;
1685 sect64.addr = m_data.GetAddress(offset_ptr: &offset);
1686 sect64.size = m_data.GetAddress(offset_ptr: &offset);
1687
1688 if (m_data.GetU32(offset_ptr: &offset, dst: &sect64.offset, count: num_u32s) == nullptr)
1689 break;
1690
1691 if (IsSharedCacheBinary() && !IsInMemory()) {
1692 sect64.offset = sect64.addr - m_text_address;
1693 }
1694
1695 // Keep a list of mach sections around in case we need to get at data that
1696 // isn't stored in the abstracted Sections.
1697 m_mach_sections.push_back(x: sect64);
1698
1699 if (add_section) {
1700 ConstString section_name(
1701 sect64.sectname, strnlen(string: sect64.sectname, maxlen: sizeof(sect64.sectname)));
1702 if (!const_segname) {
1703 // We have a segment with no name so we need to conjure up segments
1704 // that correspond to the section's segname if there isn't already such
1705 // a section. If there is such a section, we resize the section so that
1706 // it spans all sections. We also mark these sections as fake so
1707 // address matches don't hit if they land in the gaps between the child
1708 // sections.
1709 const_segname.SetTrimmedCStringWithLength(cstr: sect64.segname,
1710 fixed_cstr_len: sizeof(sect64.segname));
1711 segment_sp = context.UnifiedList.FindSectionByName(section_dstr: const_segname);
1712 if (segment_sp.get()) {
1713 Section *segment = segment_sp.get();
1714 // Grow the section size as needed.
1715 const lldb::addr_t sect64_min_addr = sect64.addr;
1716 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1717 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1718 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1719 const lldb::addr_t curr_seg_max_addr =
1720 curr_seg_min_addr + curr_seg_byte_size;
1721 if (sect64_min_addr >= curr_seg_min_addr) {
1722 const lldb::addr_t new_seg_byte_size =
1723 sect64_max_addr - curr_seg_min_addr;
1724 // Only grow the section size if needed
1725 if (new_seg_byte_size > curr_seg_byte_size)
1726 segment->SetByteSize(new_seg_byte_size);
1727 } else {
1728 // We need to change the base address of the segment and adjust the
1729 // child section offsets for all existing children.
1730 const lldb::addr_t slide_amount =
1731 sect64_min_addr - curr_seg_min_addr;
1732 segment->Slide(slide_amount, slide_children: false);
1733 segment->GetChildren().Slide(slide_amount: -slide_amount, slide_children: false);
1734 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1735 }
1736
1737 // Grow the section size as needed.
1738 if (sect64.offset) {
1739 const lldb::addr_t segment_min_file_offset =
1740 segment->GetFileOffset();
1741 const lldb::addr_t segment_max_file_offset =
1742 segment_min_file_offset + segment->GetFileSize();
1743
1744 const lldb::addr_t section_min_file_offset = sect64.offset;
1745 const lldb::addr_t section_max_file_offset =
1746 section_min_file_offset + sect64.size;
1747 const lldb::addr_t new_file_offset =
1748 std::min(a: section_min_file_offset, b: segment_min_file_offset);
1749 const lldb::addr_t new_file_size =
1750 std::max(a: section_max_file_offset, b: segment_max_file_offset) -
1751 new_file_offset;
1752 segment->SetFileOffset(new_file_offset);
1753 segment->SetFileSize(new_file_size);
1754 }
1755 } else {
1756 // Create a fake section for the section's named segment
1757 segment_sp = std::make_shared<Section>(
1758 args&: segment_sp, // Parent section
1759 args&: module_sp, // Module to which this section belongs
1760 args: this, // Object file to which this section belongs
1761 args: ++context.NextSegmentIdx
1762 << 8, // Section ID is the 1 based segment index
1763 // shifted right by 8 bits as not to
1764 // collide with any of the 256 section IDs
1765 // that are possible
1766 args&: const_segname, // Name of this section
1767 args: eSectionTypeContainer, // This section is a container of
1768 // other sections.
1769 args&: sect64.addr, // File VM address == addresses as they are
1770 // found in the object file
1771 args&: sect64.size, // VM size in bytes of this section
1772 args&: sect64.offset, // Offset to the data for this section in
1773 // the file
1774 args: sect64.offset ? sect64.size : 0, // Size in bytes of
1775 // this section as
1776 // found in the file
1777 args&: sect64.align,
1778 args&: load_cmd.flags); // Flags for this section
1779 segment_sp->SetIsFake(true);
1780 segment_sp->SetPermissions(segment_permissions);
1781 m_sections_up->AddSection(section_sp: segment_sp);
1782 if (add_to_unified)
1783 context.UnifiedList.AddSection(section_sp: segment_sp);
1784 segment_sp->SetIsEncrypted(segment_is_encrypted);
1785 }
1786 }
1787 assert(segment_sp.get());
1788
1789 lldb::SectionType sect_type = GetSectionType(flags: sect64.flags, section_name);
1790
1791 SectionSP section_sp(new Section(
1792 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1793 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1794 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1795 sect64.flags));
1796 // Set the section to be encrypted to match the segment
1797
1798 bool section_is_encrypted = false;
1799 if (!segment_is_encrypted && load_cmd.filesize != 0)
1800 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1801 addr: sect64.offset) != nullptr;
1802
1803 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1804 section_sp->SetPermissions(segment_permissions);
1805 segment_sp->GetChildren().AddSection(section_sp);
1806
1807 if (segment_sp->IsFake()) {
1808 segment_sp.reset();
1809 const_segname.Clear();
1810 }
1811 }
1812 }
1813 if (segment_sp && is_dsym) {
1814 if (first_segment_sectID <= context.NextSectionIdx) {
1815 lldb::user_id_t sect_uid;
1816 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1817 ++sect_uid) {
1818 SectionSP curr_section_sp(
1819 segment_sp->GetChildren().FindSectionByID(sect_id: sect_uid));
1820 SectionSP next_section_sp;
1821 if (sect_uid + 1 <= context.NextSectionIdx)
1822 next_section_sp =
1823 segment_sp->GetChildren().FindSectionByID(sect_id: sect_uid + 1);
1824
1825 if (curr_section_sp.get()) {
1826 if (curr_section_sp->GetByteSize() == 0) {
1827 if (next_section_sp.get() != nullptr)
1828 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1829 curr_section_sp->GetFileAddress());
1830 else
1831 curr_section_sp->SetByteSize(load_cmd.vmsize);
1832 }
1833 }
1834 }
1835 }
1836 }
1837}
1838
1839void ObjectFileMachO::ProcessDysymtabCommand(
1840 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1841 m_dysymtab.cmd = load_cmd.cmd;
1842 m_dysymtab.cmdsize = load_cmd.cmdsize;
1843 m_data.GetU32(offset_ptr: &offset, dst: &m_dysymtab.ilocalsym,
1844 count: (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1845}
1846
1847void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1848 if (m_sections_up)
1849 return;
1850
1851 m_sections_up = std::make_unique<SectionList>();
1852
1853 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1854 // bool dump_sections = false;
1855 ModuleSP module_sp(GetModule());
1856
1857 offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1858
1859 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1860 llvm::MachO::load_command load_cmd;
1861 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1862 const lldb::offset_t load_cmd_offset = offset;
1863 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
1864 break;
1865
1866 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1867 ProcessSegmentCommand(load_cmd_: load_cmd, offset, cmd_idx: i, context);
1868 else if (load_cmd.cmd == LC_DYSYMTAB)
1869 ProcessDysymtabCommand(load_cmd, offset);
1870
1871 offset = load_cmd_offset + load_cmd.cmdsize;
1872 }
1873
1874 if (context.FileAddressesChanged && module_sp)
1875 module_sp->SectionFileAddressesChanged();
1876}
1877
1878class MachSymtabSectionInfo {
1879public:
1880 MachSymtabSectionInfo(SectionList *section_list)
1881 : m_section_list(section_list), m_section_infos() {
1882 // Get the number of sections down to a depth of 1 to include all segments
1883 // and their sections, but no other sections that may be added for debug
1884 // map or
1885 m_section_infos.resize(new_size: section_list->GetNumSections(depth: 1));
1886 }
1887
1888 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1889 if (n_sect == 0)
1890 return SectionSP();
1891 if (n_sect < m_section_infos.size()) {
1892 if (!m_section_infos[n_sect].section_sp) {
1893 SectionSP section_sp(m_section_list->FindSectionByID(sect_id: n_sect));
1894 m_section_infos[n_sect].section_sp = section_sp;
1895 if (section_sp) {
1896 m_section_infos[n_sect].vm_range.SetBaseAddress(
1897 section_sp->GetFileAddress());
1898 m_section_infos[n_sect].vm_range.SetByteSize(
1899 section_sp->GetByteSize());
1900 } else {
1901 std::string filename = "<unknown>";
1902 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(idx: 0));
1903 if (first_section_sp)
1904 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1905
1906 Debugger::ReportError(
1907 message: llvm::formatv(Fmt: "unable to find section {0} for a symbol in "
1908 "{1}, corrupt file?",
1909 Vals&: n_sect, Vals&: filename));
1910 }
1911 }
1912 if (m_section_infos[n_sect].vm_range.Contains(addr: file_addr)) {
1913 // Symbol is in section.
1914 return m_section_infos[n_sect].section_sp;
1915 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1916 m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1917 file_addr) {
1918 // Symbol is in section with zero size, but has the same start address
1919 // as the section. This can happen with linker symbols (symbols that
1920 // start with the letter 'l' or 'L'.
1921 return m_section_infos[n_sect].section_sp;
1922 }
1923 }
1924 return m_section_list->FindSectionContainingFileAddress(addr: file_addr);
1925 }
1926
1927protected:
1928 struct SectionInfo {
1929 SectionInfo() : vm_range(), section_sp() {}
1930
1931 VMRange vm_range;
1932 SectionSP section_sp;
1933 };
1934 SectionList *m_section_list;
1935 std::vector<SectionInfo> m_section_infos;
1936};
1937
1938#define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1939struct TrieEntry {
1940 void Dump() const {
1941 printf(format: "0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1942 static_cast<unsigned long long>(address),
1943 static_cast<unsigned long long>(flags),
1944 static_cast<unsigned long long>(other), name.GetCString());
1945 if (import_name)
1946 printf(format: " -> \"%s\"\n", import_name.GetCString());
1947 else
1948 printf(format: "\n");
1949 }
1950 ConstString name;
1951 uint64_t address = LLDB_INVALID_ADDRESS;
1952 uint64_t flags =
1953 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
1954 // TRIE_SYMBOL_IS_THUMB
1955 uint64_t other = 0;
1956 ConstString import_name;
1957};
1958
1959struct TrieEntryWithOffset {
1960 lldb::offset_t nodeOffset;
1961 TrieEntry entry;
1962
1963 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {}
1964
1965 void Dump(uint32_t idx) const {
1966 printf(format: "[%3u] 0x%16.16llx: ", idx,
1967 static_cast<unsigned long long>(nodeOffset));
1968 entry.Dump();
1969 }
1970
1971 bool operator<(const TrieEntryWithOffset &other) const {
1972 return (nodeOffset < other.nodeOffset);
1973 }
1974};
1975
1976static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
1977 const bool is_arm, addr_t text_seg_base_addr,
1978 std::vector<llvm::StringRef> &nameSlices,
1979 std::set<lldb::addr_t> &resolver_addresses,
1980 std::vector<TrieEntryWithOffset> &reexports,
1981 std::vector<TrieEntryWithOffset> &ext_symbols) {
1982 if (!data.ValidOffset(offset))
1983 return true;
1984
1985 // Terminal node -- end of a branch, possibly add this to
1986 // the symbol table or resolver table.
1987 const uint64_t terminalSize = data.GetULEB128(offset_ptr: &offset);
1988 lldb::offset_t children_offset = offset + terminalSize;
1989 if (terminalSize != 0) {
1990 TrieEntryWithOffset e(offset);
1991 e.entry.flags = data.GetULEB128(offset_ptr: &offset);
1992 const char *import_name = nullptr;
1993 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1994 e.entry.address = 0;
1995 e.entry.other = data.GetULEB128(offset_ptr: &offset); // dylib ordinal
1996 import_name = data.GetCStr(offset_ptr: &offset);
1997 } else {
1998 e.entry.address = data.GetULEB128(offset_ptr: &offset);
1999 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2000 e.entry.address += text_seg_base_addr;
2001 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2002 e.entry.other = data.GetULEB128(offset_ptr: &offset);
2003 uint64_t resolver_addr = e.entry.other;
2004 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2005 resolver_addr += text_seg_base_addr;
2006 if (is_arm)
2007 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2008 resolver_addresses.insert(x: resolver_addr);
2009 } else
2010 e.entry.other = 0;
2011 }
2012 bool add_this_entry = false;
2013 if (Flags(e.entry.flags).Test(bit: EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2014 import_name && import_name[0]) {
2015 // add symbols that are reexport symbols with a valid import name.
2016 add_this_entry = true;
2017 } else if (e.entry.flags == 0 &&
2018 (import_name == nullptr || import_name[0] == '\0')) {
2019 // add externally visible symbols, in case the nlist record has
2020 // been stripped/omitted.
2021 add_this_entry = true;
2022 }
2023 if (add_this_entry) {
2024 std::string name;
2025 if (!nameSlices.empty()) {
2026 for (auto name_slice : nameSlices)
2027 name.append(s: name_slice.data(), n: name_slice.size());
2028 }
2029 if (name.size() > 1) {
2030 // Skip the leading '_'
2031 e.entry.name.SetCStringWithLength(cstr: name.c_str() + 1, cstr_len: name.size() - 1);
2032 }
2033 if (import_name) {
2034 // Skip the leading '_'
2035 e.entry.import_name.SetCString(import_name + 1);
2036 }
2037 if (Flags(e.entry.flags).Test(bit: EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2038 reexports.push_back(x: e);
2039 } else {
2040 if (is_arm && (e.entry.address & 1)) {
2041 e.entry.flags |= TRIE_SYMBOL_IS_THUMB;
2042 e.entry.address &= THUMB_ADDRESS_BIT_MASK;
2043 }
2044 ext_symbols.push_back(x: e);
2045 }
2046 }
2047 }
2048
2049 const uint8_t childrenCount = data.GetU8(offset_ptr: &children_offset);
2050 for (uint8_t i = 0; i < childrenCount; ++i) {
2051 const char *cstr = data.GetCStr(offset_ptr: &children_offset);
2052 if (cstr)
2053 nameSlices.push_back(x: llvm::StringRef(cstr));
2054 else
2055 return false; // Corrupt data
2056 lldb::offset_t childNodeOffset = data.GetULEB128(offset_ptr: &children_offset);
2057 if (childNodeOffset) {
2058 if (!ParseTrieEntries(data, offset: childNodeOffset, is_arm, text_seg_base_addr,
2059 nameSlices, resolver_addresses, reexports,
2060 ext_symbols)) {
2061 return false;
2062 }
2063 }
2064 nameSlices.pop_back();
2065 }
2066 return true;
2067}
2068
2069static SymbolType GetSymbolType(const char *&symbol_name,
2070 bool &demangled_is_synthesized,
2071 const SectionSP &text_section_sp,
2072 const SectionSP &data_section_sp,
2073 const SectionSP &data_dirty_section_sp,
2074 const SectionSP &data_const_section_sp,
2075 const SectionSP &symbol_section) {
2076 SymbolType type = eSymbolTypeInvalid;
2077
2078 const char *symbol_sect_name = symbol_section->GetName().AsCString();
2079 if (symbol_section->IsDescendant(section: text_section_sp.get())) {
2080 if (symbol_section->IsClear(bit: S_ATTR_PURE_INSTRUCTIONS |
2081 S_ATTR_SELF_MODIFYING_CODE |
2082 S_ATTR_SOME_INSTRUCTIONS))
2083 type = eSymbolTypeData;
2084 else
2085 type = eSymbolTypeCode;
2086 } else if (symbol_section->IsDescendant(section: data_section_sp.get()) ||
2087 symbol_section->IsDescendant(section: data_dirty_section_sp.get()) ||
2088 symbol_section->IsDescendant(section: data_const_section_sp.get())) {
2089 if (symbol_sect_name &&
2090 ::strstr(haystack: symbol_sect_name, needle: "__objc") == symbol_sect_name) {
2091 type = eSymbolTypeRuntime;
2092
2093 if (symbol_name) {
2094 llvm::StringRef symbol_name_ref(symbol_name);
2095 if (symbol_name_ref.starts_with(Prefix: "OBJC_")) {
2096 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2097 static const llvm::StringRef g_objc_v2_prefix_metaclass(
2098 "OBJC_METACLASS_$_");
2099 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2100 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
2101 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2102 type = eSymbolTypeObjCClass;
2103 demangled_is_synthesized = true;
2104 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_metaclass)) {
2105 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2106 type = eSymbolTypeObjCMetaClass;
2107 demangled_is_synthesized = true;
2108 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_ivar)) {
2109 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2110 type = eSymbolTypeObjCIVar;
2111 demangled_is_synthesized = true;
2112 }
2113 }
2114 }
2115 } else if (symbol_sect_name &&
2116 ::strstr(haystack: symbol_sect_name, needle: "__gcc_except_tab") ==
2117 symbol_sect_name) {
2118 type = eSymbolTypeException;
2119 } else {
2120 type = eSymbolTypeData;
2121 }
2122 } else if (symbol_sect_name &&
2123 ::strstr(haystack: symbol_sect_name, needle: "__IMPORT") == symbol_sect_name) {
2124 type = eSymbolTypeTrampoline;
2125 }
2126 return type;
2127}
2128
2129static std::optional<struct nlist_64>
2130ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2131 size_t nlist_byte_size) {
2132 struct nlist_64 nlist;
2133 if (!nlist_data.ValidOffsetForDataOfSize(offset: nlist_data_offset, length: nlist_byte_size))
2134 return {};
2135 nlist.n_strx = nlist_data.GetU32_unchecked(offset_ptr: &nlist_data_offset);
2136 nlist.n_type = nlist_data.GetU8_unchecked(offset_ptr: &nlist_data_offset);
2137 nlist.n_sect = nlist_data.GetU8_unchecked(offset_ptr: &nlist_data_offset);
2138 nlist.n_desc = nlist_data.GetU16_unchecked(offset_ptr: &nlist_data_offset);
2139 nlist.n_value = nlist_data.GetAddress_unchecked(offset_ptr: &nlist_data_offset);
2140 return nlist;
2141}
2142
2143enum { DebugSymbols = true, NonDebugSymbols = false };
2144
2145void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
2146 ModuleSP module_sp(GetModule());
2147 if (!module_sp)
2148 return;
2149
2150 Log *log = GetLog(mask: LLDBLog::Symbols);
2151
2152 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2153 const char *file_name = file.GetFilename().AsCString(value_if_empty: "<Unknown>");
2154 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2155 LLDB_LOG(log, "Parsing symbol table for {0}", file_name);
2156 Progress progress("Parsing symbol table", file_name);
2157
2158 llvm::MachO::linkedit_data_command function_starts_load_command = {.cmd: 0, .cmdsize: 0, .dataoff: 0, .datasize: 0};
2159 llvm::MachO::linkedit_data_command exports_trie_load_command = {.cmd: 0, .cmdsize: 0, .dataoff: 0, .datasize: 0};
2160 llvm::MachO::dyld_info_command dyld_info = {.cmd: 0, .cmdsize: 0, .rebase_off: 0, .rebase_size: 0, .bind_off: 0, .bind_size: 0, .weak_bind_off: 0, .weak_bind_size: 0, .lazy_bind_off: 0, .lazy_bind_size: 0, .export_off: 0, .export_size: 0};
2161 llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2162 SymtabCommandLargeOffsets symtab_load_command;
2163 // The data element of type bool indicates that this entry is thumb
2164 // code.
2165 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2166
2167 // Record the address of every function/data that we add to the symtab.
2168 // We add symbols to the table in the order of most information (nlist
2169 // records) to least (function starts), and avoid duplicating symbols
2170 // via this set.
2171 llvm::DenseSet<addr_t> symbols_added;
2172
2173 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2174 // do not add the tombstone or empty keys to the set.
2175 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2176 // Don't add the tombstone or empty keys.
2177 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2178 return;
2179 symbols_added.insert(V: file_addr);
2180 };
2181 FunctionStarts function_starts;
2182 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
2183 uint32_t i;
2184 FileSpecList dylib_files;
2185 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2186 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2187 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2188 UUID image_uuid;
2189
2190 for (i = 0; i < m_header.ncmds; ++i) {
2191 const lldb::offset_t cmd_offset = offset;
2192 // Read in the load command and load command size
2193 llvm::MachO::load_command lc;
2194 if (m_data.GetU32(offset_ptr: &offset, dst: &lc, count: 2) == nullptr)
2195 break;
2196 // Watch for the symbol table load command
2197 switch (lc.cmd) {
2198 case LC_SYMTAB:
2199 // struct symtab_command {
2200 // uint32_t cmd; /* LC_SYMTAB */
2201 // uint32_t cmdsize; /* sizeof(struct symtab_command) */
2202 // uint32_t symoff; /* symbol table offset */
2203 // uint32_t nsyms; /* number of symbol table entries */
2204 // uint32_t stroff; /* string table offset */
2205 // uint32_t strsize; /* string table size in bytes */
2206 // };
2207 symtab_load_command.cmd = lc.cmd;
2208 symtab_load_command.cmdsize = lc.cmdsize;
2209 symtab_load_command.symoff = m_data.GetU32(offset_ptr: &offset);
2210 symtab_load_command.nsyms = m_data.GetU32(offset_ptr: &offset);
2211 symtab_load_command.stroff = m_data.GetU32(offset_ptr: &offset);
2212 symtab_load_command.strsize = m_data.GetU32(offset_ptr: &offset);
2213 break;
2214
2215 case LC_DYLD_INFO:
2216 case LC_DYLD_INFO_ONLY:
2217 if (m_data.GetU32(offset_ptr: &offset, dst: &dyld_info.rebase_off, count: 10)) {
2218 dyld_info.cmd = lc.cmd;
2219 dyld_info.cmdsize = lc.cmdsize;
2220 } else {
2221 memset(s: &dyld_info, c: 0, n: sizeof(dyld_info));
2222 }
2223 break;
2224
2225 case LC_LOAD_DYLIB:
2226 case LC_LOAD_WEAK_DYLIB:
2227 case LC_REEXPORT_DYLIB:
2228 case LC_LOADFVMLIB:
2229 case LC_LOAD_UPWARD_DYLIB: {
2230 uint32_t name_offset = cmd_offset + m_data.GetU32(offset_ptr: &offset);
2231 const char *path = m_data.PeekCStr(offset: name_offset);
2232 if (path) {
2233 FileSpec file_spec(path);
2234 // Strip the path if there is @rpath, @executable, etc so we just use
2235 // the basename
2236 if (path[0] == '@')
2237 file_spec.ClearDirectory();
2238
2239 if (lc.cmd == LC_REEXPORT_DYLIB) {
2240 m_reexported_dylibs.AppendIfUnique(file: file_spec);
2241 }
2242
2243 dylib_files.Append(file: file_spec);
2244 }
2245 } break;
2246
2247 case LC_DYLD_EXPORTS_TRIE:
2248 exports_trie_load_command.cmd = lc.cmd;
2249 exports_trie_load_command.cmdsize = lc.cmdsize;
2250 if (m_data.GetU32(offset_ptr: &offset, dst: &exports_trie_load_command.dataoff, count: 2) ==
2251 nullptr) // fill in offset and size fields
2252 memset(s: &exports_trie_load_command, c: 0,
2253 n: sizeof(exports_trie_load_command));
2254 break;
2255 case LC_FUNCTION_STARTS:
2256 function_starts_load_command.cmd = lc.cmd;
2257 function_starts_load_command.cmdsize = lc.cmdsize;
2258 if (m_data.GetU32(offset_ptr: &offset, dst: &function_starts_load_command.dataoff, count: 2) ==
2259 nullptr) // fill in data offset and size fields
2260 memset(s: &function_starts_load_command, c: 0,
2261 n: sizeof(function_starts_load_command));
2262 break;
2263
2264 case LC_UUID: {
2265 const uint8_t *uuid_bytes = m_data.PeekData(offset, length: 16);
2266
2267 if (uuid_bytes)
2268 image_uuid = UUID(uuid_bytes, 16);
2269 break;
2270 }
2271
2272 default:
2273 break;
2274 }
2275 offset = cmd_offset + lc.cmdsize;
2276 }
2277
2278 if (!symtab_load_command.cmd)
2279 return;
2280
2281 SectionList *section_list = GetSectionList();
2282 if (section_list == nullptr)
2283 return;
2284
2285 const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2286 const ByteOrder byte_order = m_data.GetByteOrder();
2287 bool bit_width_32 = addr_byte_size == 4;
2288 const size_t nlist_byte_size =
2289 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2290
2291 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2292 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2293 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2294 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2295 addr_byte_size);
2296 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2297
2298 const addr_t nlist_data_byte_size =
2299 symtab_load_command.nsyms * nlist_byte_size;
2300 const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2301 addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2302
2303 ProcessSP process_sp(m_process_wp.lock());
2304 Process *process = process_sp.get();
2305
2306 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2307 bool is_shared_cache_image = IsSharedCacheBinary();
2308 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2309
2310 ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2311 ConstString g_segment_name_DATA = GetSegmentNameDATA();
2312 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2313 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2314 ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2315 ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2316 ConstString g_section_name_lldb_no_nlist = GetSectionNameLLDBNoNlist();
2317 SectionSP text_section_sp(
2318 section_list->FindSectionByName(section_dstr: g_segment_name_TEXT));
2319 SectionSP data_section_sp(
2320 section_list->FindSectionByName(section_dstr: g_segment_name_DATA));
2321 SectionSP linkedit_section_sp(
2322 section_list->FindSectionByName(section_dstr: GetSegmentNameLINKEDIT()));
2323 SectionSP data_dirty_section_sp(
2324 section_list->FindSectionByName(section_dstr: g_segment_name_DATA_DIRTY));
2325 SectionSP data_const_section_sp(
2326 section_list->FindSectionByName(section_dstr: g_segment_name_DATA_CONST));
2327 SectionSP objc_section_sp(
2328 section_list->FindSectionByName(section_dstr: g_segment_name_OBJC));
2329 SectionSP eh_frame_section_sp;
2330 SectionSP lldb_no_nlist_section_sp;
2331 if (text_section_sp.get()) {
2332 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2333 section_dstr: g_section_name_eh_frame);
2334 lldb_no_nlist_section_sp = text_section_sp->GetChildren().FindSectionByName(
2335 section_dstr: g_section_name_lldb_no_nlist);
2336 } else {
2337 eh_frame_section_sp =
2338 section_list->FindSectionByName(section_dstr: g_section_name_eh_frame);
2339 lldb_no_nlist_section_sp =
2340 section_list->FindSectionByName(section_dstr: g_section_name_lldb_no_nlist);
2341 }
2342
2343 if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2344 !is_local_shared_cache_image) {
2345 Target &target = process->GetTarget();
2346
2347 memory_module_load_level = target.GetMemoryModuleLoadLevel();
2348
2349 // If __TEXT,__lldb_no_nlist section is present in this binary,
2350 // and we're reading it out of memory, do not read any of the
2351 // nlist entries. They are not needed in lldb and it may be
2352 // expensive to load these. This is to handle a dylib consisting
2353 // of only metadata, no code, but it has many nlist entries.
2354 if (lldb_no_nlist_section_sp)
2355 memory_module_load_level = eMemoryModuleLoadLevelMinimal;
2356
2357 // Reading mach file from memory in a process or core file...
2358
2359 if (linkedit_section_sp) {
2360 addr_t linkedit_load_addr =
2361 linkedit_section_sp->GetLoadBaseAddress(target: &target);
2362 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2363 // We might be trying to access the symbol table before the
2364 // __LINKEDIT's load address has been set in the target. We can't
2365 // fail to read the symbol table, so calculate the right address
2366 // manually
2367 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2368 mach_header_load_address: m_memory_addr, mach_header_section: GetMachHeaderSection(), section: linkedit_section_sp.get());
2369 }
2370
2371 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2372 const addr_t symoff_addr = linkedit_load_addr +
2373 symtab_load_command.symoff -
2374 linkedit_file_offset;
2375 strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2376 linkedit_file_offset;
2377
2378 // Always load dyld - the dynamic linker - from memory if we didn't
2379 // find a binary anywhere else. lldb will not register
2380 // dylib/framework/bundle loads/unloads if we don't have the dyld
2381 // symbols, we force dyld to load from memory despite the user's
2382 // target.memory-module-load-level setting.
2383 if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2384 m_header.filetype == llvm::MachO::MH_DYLINKER) {
2385 DataBufferSP nlist_data_sp(
2386 ReadMemory(process_sp, addr: symoff_addr, byte_size: nlist_data_byte_size));
2387 if (nlist_data_sp)
2388 nlist_data.SetData(data_sp: nlist_data_sp, offset: 0, length: nlist_data_sp->GetByteSize());
2389 if (dysymtab.nindirectsyms != 0) {
2390 const addr_t indirect_syms_addr = linkedit_load_addr +
2391 dysymtab.indirectsymoff -
2392 linkedit_file_offset;
2393 DataBufferSP indirect_syms_data_sp(ReadMemory(
2394 process_sp, addr: indirect_syms_addr, byte_size: dysymtab.nindirectsyms * 4));
2395 if (indirect_syms_data_sp)
2396 indirect_symbol_index_data.SetData(
2397 data_sp: indirect_syms_data_sp, offset: 0, length: indirect_syms_data_sp->GetByteSize());
2398 // If this binary is outside the shared cache,
2399 // cache the string table.
2400 // Binaries in the shared cache all share a giant string table,
2401 // and we can't share the string tables across multiple
2402 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2403 // for every binary in the shared cache - it would be a big perf
2404 // problem. For binaries outside the shared cache, it's faster to
2405 // read the entire strtab at once instead of piece-by-piece as we
2406 // process the nlist records.
2407 if (!is_shared_cache_image) {
2408 DataBufferSP strtab_data_sp(
2409 ReadMemory(process_sp, addr: strtab_addr, byte_size: strtab_data_byte_size));
2410 if (strtab_data_sp) {
2411 strtab_data.SetData(data_sp: strtab_data_sp, offset: 0,
2412 length: strtab_data_sp->GetByteSize());
2413 }
2414 }
2415 }
2416 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2417 if (function_starts_load_command.cmd) {
2418 const addr_t func_start_addr =
2419 linkedit_load_addr + function_starts_load_command.dataoff -
2420 linkedit_file_offset;
2421 DataBufferSP func_start_data_sp(
2422 ReadMemory(process_sp, addr: func_start_addr,
2423 byte_size: function_starts_load_command.datasize));
2424 if (func_start_data_sp)
2425 function_starts_data.SetData(data_sp: func_start_data_sp, offset: 0,
2426 length: func_start_data_sp->GetByteSize());
2427 }
2428 }
2429 }
2430 }
2431 } else {
2432 if (is_local_shared_cache_image) {
2433 // The load commands in shared cache images are relative to the
2434 // beginning of the shared cache, not the library image. The
2435 // data we get handed when creating the ObjectFileMachO starts
2436 // at the beginning of a specific library and spans to the end
2437 // of the cache to be able to reach the shared LINKEDIT
2438 // segments. We need to convert the load command offsets to be
2439 // relative to the beginning of our specific image.
2440 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2441 lldb::offset_t linkedit_slide =
2442 linkedit_offset - m_linkedit_original_offset;
2443 symtab_load_command.symoff += linkedit_slide;
2444 symtab_load_command.stroff += linkedit_slide;
2445 dyld_info.export_off += linkedit_slide;
2446 dysymtab.indirectsymoff += linkedit_slide;
2447 function_starts_load_command.dataoff += linkedit_slide;
2448 exports_trie_load_command.dataoff += linkedit_slide;
2449 }
2450
2451 nlist_data.SetData(data: m_data, offset: symtab_load_command.symoff,
2452 length: nlist_data_byte_size);
2453 strtab_data.SetData(data: m_data, offset: symtab_load_command.stroff,
2454 length: strtab_data_byte_size);
2455
2456 // We shouldn't have exports data from both the LC_DYLD_INFO command
2457 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2458 lldbassert(!((dyld_info.export_size > 0)
2459 && (exports_trie_load_command.datasize > 0)));
2460 if (dyld_info.export_size > 0) {
2461 dyld_trie_data.SetData(data: m_data, offset: dyld_info.export_off,
2462 length: dyld_info.export_size);
2463 } else if (exports_trie_load_command.datasize > 0) {
2464 dyld_trie_data.SetData(data: m_data, offset: exports_trie_load_command.dataoff,
2465 length: exports_trie_load_command.datasize);
2466 }
2467
2468 if (dysymtab.nindirectsyms != 0) {
2469 indirect_symbol_index_data.SetData(data: m_data, offset: dysymtab.indirectsymoff,
2470 length: dysymtab.nindirectsyms * 4);
2471 }
2472 if (function_starts_load_command.cmd) {
2473 function_starts_data.SetData(data: m_data, offset: function_starts_load_command.dataoff,
2474 length: function_starts_load_command.datasize);
2475 }
2476 }
2477
2478 const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2479
2480 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2481 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2482
2483 // lldb works best if it knows the start address of all functions in a
2484 // module. Linker symbols or debug info are normally the best source of
2485 // information for start addr / size but they may be stripped in a released
2486 // binary. Two additional sources of information exist in Mach-O binaries:
2487 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2488 // function's start address in the
2489 // binary, relative to the text section.
2490 // eh_frame - the eh_frame FDEs have the start addr & size of
2491 // each function
2492 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2493 // all modern binaries.
2494 // Binaries built to run on older releases may need to use eh_frame
2495 // information.
2496
2497 if (text_section_sp && function_starts_data.GetByteSize()) {
2498 FunctionStarts::Entry function_start_entry;
2499 function_start_entry.data = false;
2500 lldb::offset_t function_start_offset = 0;
2501 function_start_entry.addr = text_section_sp->GetFileAddress();
2502 uint64_t delta;
2503 while ((delta = function_starts_data.GetULEB128(offset_ptr: &function_start_offset)) >
2504 0) {
2505 // Now append the current entry
2506 function_start_entry.addr += delta;
2507 if (is_arm) {
2508 if (function_start_entry.addr & 1) {
2509 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2510 function_start_entry.data = true;
2511 } else if (always_thumb) {
2512 function_start_entry.data = true;
2513 }
2514 }
2515 function_starts.Append(entry: function_start_entry);
2516 }
2517 } else {
2518 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2519 // load command claiming an eh_frame but it doesn't actually have the
2520 // eh_frame content. And if we have a dSYM, we don't need to do any of
2521 // this fill-in-the-missing-symbols works anyway - the debug info should
2522 // give us all the functions in the module.
2523 if (text_section_sp.get() && eh_frame_section_sp.get() &&
2524 m_type != eTypeDebugInfo) {
2525 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2526 DWARFCallFrameInfo::EH);
2527 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions;
2528 eh_frame.GetFunctionAddressAndSizeVector(function_info&: functions);
2529 addr_t text_base_addr = text_section_sp->GetFileAddress();
2530 size_t count = functions.GetSize();
2531 for (size_t i = 0; i < count; ++i) {
2532 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func =
2533 functions.GetEntryAtIndex(i);
2534 if (func) {
2535 FunctionStarts::Entry function_start_entry;
2536 function_start_entry.addr = func->base - text_base_addr;
2537 if (is_arm) {
2538 if (function_start_entry.addr & 1) {
2539 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2540 function_start_entry.data = true;
2541 } else if (always_thumb) {
2542 function_start_entry.data = true;
2543 }
2544 }
2545 function_starts.Append(entry: function_start_entry);
2546 }
2547 }
2548 }
2549 }
2550
2551 const size_t function_starts_count = function_starts.GetSize();
2552
2553 // For user process binaries (executables, dylibs, frameworks, bundles), if
2554 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2555 // going to assume the binary has been stripped. Don't allow assembly
2556 // language instruction emulation because we don't know proper function
2557 // start boundaries.
2558 //
2559 // For all other types of binaries (kernels, stand-alone bare board
2560 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2561 // sections - we should not make any assumptions about them based on that.
2562 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2563 m_allow_assembly_emulation_unwind_plans = false;
2564 Log *unwind_or_symbol_log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Unwind));
2565
2566 if (unwind_or_symbol_log)
2567 module_sp->LogMessage(
2568 log: unwind_or_symbol_log,
2569 format: "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2570 }
2571
2572 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2573 ? eh_frame_section_sp->GetID()
2574 : static_cast<user_id_t>(NO_SECT);
2575
2576 uint32_t N_SO_index = UINT32_MAX;
2577
2578 MachSymtabSectionInfo section_info(section_list);
2579 std::vector<uint32_t> N_FUN_indexes;
2580 std::vector<uint32_t> N_NSYM_indexes;
2581 std::vector<uint32_t> N_INCL_indexes;
2582 std::vector<uint32_t> N_BRAC_indexes;
2583 std::vector<uint32_t> N_COMM_indexes;
2584 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2585 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2586 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2587 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2588 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2589 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2590 // Any symbols that get merged into another will get an entry in this map
2591 // so we know
2592 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2593 uint32_t nlist_idx = 0;
2594 Symbol *symbol_ptr = nullptr;
2595
2596 uint32_t sym_idx = 0;
2597 Symbol *sym = nullptr;
2598 size_t num_syms = 0;
2599 std::string memory_symbol_name;
2600 uint32_t unmapped_local_symbols_found = 0;
2601
2602 std::vector<TrieEntryWithOffset> reexport_trie_entries;
2603 std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2604 std::set<lldb::addr_t> resolver_addresses;
2605
2606 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize();
2607 if (dyld_trie_data_size > 0) {
2608 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size);
2609 SectionSP text_segment_sp =
2610 GetSectionList()->FindSectionByName(section_dstr: GetSegmentNameTEXT());
2611 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2612 if (text_segment_sp)
2613 text_segment_file_addr = text_segment_sp->GetFileAddress();
2614 std::vector<llvm::StringRef> nameSlices;
2615 ParseTrieEntries(data&: dyld_trie_data, offset: 0, is_arm, text_seg_base_addr: text_segment_file_addr,
2616 nameSlices, resolver_addresses, reexports&: reexport_trie_entries,
2617 ext_symbols&: external_sym_trie_entries);
2618 }
2619
2620 typedef std::set<ConstString> IndirectSymbols;
2621 IndirectSymbols indirect_symbol_names;
2622
2623#if TARGET_OS_IPHONE
2624
2625 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2626 // optimized by moving LOCAL symbols out of the memory mapped portion of
2627 // the DSC. The symbol information has all been retained, but it isn't
2628 // available in the normal nlist data. However, there *are* duplicate
2629 // entries of *some*
2630 // LOCAL symbols in the normal nlist data. To handle this situation
2631 // correctly, we must first attempt
2632 // to parse any DSC unmapped symbol information. If we find any, we set a
2633 // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2634
2635 if (IsSharedCacheBinary()) {
2636 // Before we can start mapping the DSC, we need to make certain the
2637 // target process is actually using the cache we can find.
2638
2639 // Next we need to determine the correct path for the dyld shared cache.
2640
2641 ArchSpec header_arch = GetArchitecture();
2642
2643 UUID dsc_uuid;
2644 UUID process_shared_cache_uuid;
2645 addr_t process_shared_cache_base_addr;
2646
2647 if (process) {
2648 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2649 process_shared_cache_uuid);
2650 }
2651
2652 __block bool found_image = false;
2653 __block void *nlist_buffer = nullptr;
2654 __block unsigned nlist_count = 0;
2655 __block char *string_table = nullptr;
2656 __block vm_offset_t vm_nlist_memory = 0;
2657 __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2658 __block vm_offset_t vm_string_memory = 0;
2659 __block mach_msg_type_number_t vm_string_bytes_read = 0;
2660
2661 auto _ = llvm::make_scope_exit(^{
2662 if (vm_nlist_memory)
2663 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2664 if (vm_string_memory)
2665 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2666 });
2667
2668 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2669 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2670 UndefinedNameToDescMap undefined_name_to_desc;
2671 SymbolIndexToName reexport_shlib_needs_fixup;
2672
2673 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2674 uuid_t cache_uuid;
2675 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2676 if (found_image)
2677 return;
2678
2679 if (process_shared_cache_uuid.IsValid() &&
2680 process_shared_cache_uuid != UUID(&cache_uuid, 16))
2681 return;
2682
2683 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2684 uuid_t dsc_image_uuid;
2685 if (found_image)
2686 return;
2687
2688 dyld_image_copy_uuid(image, &dsc_image_uuid);
2689 if (image_uuid != UUID(dsc_image_uuid, 16))
2690 return;
2691
2692 found_image = true;
2693
2694 // Compute the size of the string table. We need to ask dyld for a
2695 // new SPI to avoid this step.
2696 dyld_image_local_nlist_content_4Symbolication(
2697 image, ^(const void *nlistStart, uint64_t nlistCount,
2698 const char *stringTable) {
2699 if (!nlistStart || !nlistCount)
2700 return;
2701
2702 // The buffers passed here are valid only inside the block.
2703 // Use vm_read to make a cheap copy of them available for our
2704 // processing later.
2705 kern_return_t ret =
2706 vm_read(mach_task_self(), (vm_address_t)nlistStart,
2707 nlist_byte_size * nlistCount, &vm_nlist_memory,
2708 &vm_nlist_bytes_read);
2709 if (ret != KERN_SUCCESS)
2710 return;
2711 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2712
2713 // We don't know the size of the string table. It's cheaper
2714 // to map the whole VM region than to determine the size by
2715 // parsing all the nlist entries.
2716 vm_address_t string_address = (vm_address_t)stringTable;
2717 vm_size_t region_size;
2718 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2719 vm_region_basic_info_data_t info;
2720 memory_object_name_t object;
2721 ret = vm_region_64(mach_task_self(), &string_address,
2722 &region_size, VM_REGION_BASIC_INFO_64,
2723 (vm_region_info_t)&info, &info_count, &object);
2724 if (ret != KERN_SUCCESS)
2725 return;
2726
2727 ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2728 region_size -
2729 ((vm_address_t)stringTable - string_address),
2730 &vm_string_memory, &vm_string_bytes_read);
2731 if (ret != KERN_SUCCESS)
2732 return;
2733
2734 nlist_buffer = (void *)vm_nlist_memory;
2735 string_table = (char *)vm_string_memory;
2736 nlist_count = nlistCount;
2737 });
2738 });
2739 });
2740 if (nlist_buffer) {
2741 DataExtractor dsc_local_symbols_data(nlist_buffer,
2742 nlist_count * nlist_byte_size,
2743 byte_order, addr_byte_size);
2744 unmapped_local_symbols_found = nlist_count;
2745
2746 // The normal nlist code cannot correctly size the Symbols
2747 // array, we need to allocate it here.
2748 sym = symtab.Resize(
2749 symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2750 unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2751 num_syms = symtab.GetNumSymbols();
2752
2753 lldb::offset_t nlist_data_offset = 0;
2754
2755 for (uint32_t nlist_index = 0;
2756 nlist_index < nlist_count;
2757 nlist_index++) {
2758 /////////////////////////////
2759 {
2760 std::optional<struct nlist_64> nlist_maybe =
2761 ParseNList(dsc_local_symbols_data, nlist_data_offset,
2762 nlist_byte_size);
2763 if (!nlist_maybe)
2764 break;
2765 struct nlist_64 nlist = *nlist_maybe;
2766
2767 SymbolType type = eSymbolTypeInvalid;
2768 const char *symbol_name = string_table + nlist.n_strx;
2769
2770 if (symbol_name == NULL) {
2771 // No symbol should be NULL, even the symbols with no
2772 // string values should have an offset zero which
2773 // points to an empty C-string
2774 Debugger::ReportError(llvm::formatv(
2775 "DSC unmapped local symbol[{0}] has invalid "
2776 "string table offset {1:x} in {2}, ignoring symbol",
2777 nlist_index, nlist.n_strx,
2778 module_sp->GetFileSpec().GetPath()));
2779 continue;
2780 }
2781 if (symbol_name[0] == '\0')
2782 symbol_name = NULL;
2783
2784 const char *symbol_name_non_abi_mangled = NULL;
2785
2786 SectionSP symbol_section;
2787 uint32_t symbol_byte_size = 0;
2788 bool add_nlist = true;
2789 bool is_debug = ((nlist.n_type & N_STAB) != 0);
2790 bool demangled_is_synthesized = false;
2791 bool is_gsym = false;
2792 bool set_value = true;
2793
2794 assert(sym_idx < num_syms);
2795
2796 sym[sym_idx].SetDebug(is_debug);
2797
2798 if (is_debug) {
2799 switch (nlist.n_type) {
2800 case N_GSYM:
2801 // global symbol: name,,NO_SECT,type,0
2802 // Sometimes the N_GSYM value contains the address.
2803
2804 // FIXME: In the .o files, we have a GSYM and a debug
2805 // symbol for all the ObjC data. They
2806 // have the same address, but we want to ensure that
2807 // we always find only the real symbol, 'cause we
2808 // don't currently correctly attribute the
2809 // GSYM one to the ObjCClass/Ivar/MetaClass
2810 // symbol type. This is a temporary hack to make
2811 // sure the ObjectiveC symbols get treated correctly.
2812 // To do this right, we should coalesce all the GSYM
2813 // & global symbols that have the same address.
2814
2815 is_gsym = true;
2816 sym[sym_idx].SetExternal(true);
2817
2818 if (symbol_name && symbol_name[0] == '_' &&
2819 symbol_name[1] == 'O') {
2820 llvm::StringRef symbol_name_ref(symbol_name);
2821 if (symbol_name_ref.starts_with(
2822 g_objc_v2_prefix_class)) {
2823 symbol_name_non_abi_mangled = symbol_name + 1;
2824 symbol_name =
2825 symbol_name + g_objc_v2_prefix_class.size();
2826 type = eSymbolTypeObjCClass;
2827 demangled_is_synthesized = true;
2828
2829 } else if (symbol_name_ref.starts_with(
2830 g_objc_v2_prefix_metaclass)) {
2831 symbol_name_non_abi_mangled = symbol_name + 1;
2832 symbol_name =
2833 symbol_name + g_objc_v2_prefix_metaclass.size();
2834 type = eSymbolTypeObjCMetaClass;
2835 demangled_is_synthesized = true;
2836 } else if (symbol_name_ref.starts_with(
2837 g_objc_v2_prefix_ivar)) {
2838 symbol_name_non_abi_mangled = symbol_name + 1;
2839 symbol_name =
2840 symbol_name + g_objc_v2_prefix_ivar.size();
2841 type = eSymbolTypeObjCIVar;
2842 demangled_is_synthesized = true;
2843 }
2844 } else {
2845 if (nlist.n_value != 0)
2846 symbol_section = section_info.GetSection(
2847 nlist.n_sect, nlist.n_value);
2848 type = eSymbolTypeData;
2849 }
2850 break;
2851
2852 case N_FNAME:
2853 // procedure name (f77 kludge): name,,NO_SECT,0,0
2854 type = eSymbolTypeCompiler;
2855 break;
2856
2857 case N_FUN:
2858 // procedure: name,,n_sect,linenumber,address
2859 if (symbol_name) {
2860 type = eSymbolTypeCode;
2861 symbol_section = section_info.GetSection(
2862 nlist.n_sect, nlist.n_value);
2863
2864 N_FUN_addr_to_sym_idx.insert(
2865 std::make_pair(nlist.n_value, sym_idx));
2866 // We use the current number of symbols in the
2867 // symbol table in lieu of using nlist_idx in case
2868 // we ever start trimming entries out
2869 N_FUN_indexes.push_back(sym_idx);
2870 } else {
2871 type = eSymbolTypeCompiler;
2872
2873 if (!N_FUN_indexes.empty()) {
2874 // Copy the size of the function into the
2875 // original
2876 // STAB entry so we don't have
2877 // to hunt for it later
2878 symtab.SymbolAtIndex(N_FUN_indexes.back())
2879 ->SetByteSize(nlist.n_value);
2880 N_FUN_indexes.pop_back();
2881 // We don't really need the end function STAB as
2882 // it contains the size which we already placed
2883 // with the original symbol, so don't add it if
2884 // we want a minimal symbol table
2885 add_nlist = false;
2886 }
2887 }
2888 break;
2889
2890 case N_STSYM:
2891 // static symbol: name,,n_sect,type,address
2892 N_STSYM_addr_to_sym_idx.insert(
2893 std::make_pair(nlist.n_value, sym_idx));
2894 symbol_section = section_info.GetSection(nlist.n_sect,
2895 nlist.n_value);
2896 if (symbol_name && symbol_name[0]) {
2897 type = ObjectFile::GetSymbolTypeFromName(
2898 symbol_name + 1, eSymbolTypeData);
2899 }
2900 break;
2901
2902 case N_LCSYM:
2903 // .lcomm symbol: name,,n_sect,type,address
2904 symbol_section = section_info.GetSection(nlist.n_sect,
2905 nlist.n_value);
2906 type = eSymbolTypeCommonBlock;
2907 break;
2908
2909 case N_BNSYM:
2910 // We use the current number of symbols in the symbol
2911 // table in lieu of using nlist_idx in case we ever
2912 // start trimming entries out Skip these if we want
2913 // minimal symbol tables
2914 add_nlist = false;
2915 break;
2916
2917 case N_ENSYM:
2918 // Set the size of the N_BNSYM to the terminating
2919 // index of this N_ENSYM so that we can always skip
2920 // the entire symbol if we need to navigate more
2921 // quickly at the source level when parsing STABS
2922 // Skip these if we want minimal symbol tables
2923 add_nlist = false;
2924 break;
2925
2926 case N_OPT:
2927 // emitted with gcc2_compiled and in gcc source
2928 type = eSymbolTypeCompiler;
2929 break;
2930
2931 case N_RSYM:
2932 // register sym: name,,NO_SECT,type,register
2933 type = eSymbolTypeVariable;
2934 break;
2935
2936 case N_SLINE:
2937 // src line: 0,,n_sect,linenumber,address
2938 symbol_section = section_info.GetSection(nlist.n_sect,
2939 nlist.n_value);
2940 type = eSymbolTypeLineEntry;
2941 break;
2942
2943 case N_SSYM:
2944 // structure elt: name,,NO_SECT,type,struct_offset
2945 type = eSymbolTypeVariableType;
2946 break;
2947
2948 case N_SO:
2949 // source file name
2950 type = eSymbolTypeSourceFile;
2951 if (symbol_name == NULL) {
2952 add_nlist = false;
2953 if (N_SO_index != UINT32_MAX) {
2954 // Set the size of the N_SO to the terminating
2955 // index of this N_SO so that we can always skip
2956 // the entire N_SO if we need to navigate more
2957 // quickly at the source level when parsing STABS
2958 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
2959 symbol_ptr->SetByteSize(sym_idx);
2960 symbol_ptr->SetSizeIsSibling(true);
2961 }
2962 N_NSYM_indexes.clear();
2963 N_INCL_indexes.clear();
2964 N_BRAC_indexes.clear();
2965 N_COMM_indexes.clear();
2966 N_FUN_indexes.clear();
2967 N_SO_index = UINT32_MAX;
2968 } else {
2969 // We use the current number of symbols in the
2970 // symbol table in lieu of using nlist_idx in case
2971 // we ever start trimming entries out
2972 const bool N_SO_has_full_path = symbol_name[0] == '/';
2973 if (N_SO_has_full_path) {
2974 if ((N_SO_index == sym_idx - 1) &&
2975 ((sym_idx - 1) < num_syms)) {
2976 // We have two consecutive N_SO entries where
2977 // the first contains a directory and the
2978 // second contains a full path.
2979 sym[sym_idx - 1].GetMangled().SetValue(
2980 ConstString(symbol_name));
2981 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
2982 add_nlist = false;
2983 } else {
2984 // This is the first entry in a N_SO that
2985 // contains a directory or
2986 // a full path to the source file
2987 N_SO_index = sym_idx;
2988 }
2989 } else if ((N_SO_index == sym_idx - 1) &&
2990 ((sym_idx - 1) < num_syms)) {
2991 // This is usually the second N_SO entry that
2992 // contains just the filename, so here we combine
2993 // it with the first one if we are minimizing the
2994 // symbol table
2995 const char *so_path = sym[sym_idx - 1]
2996 .GetMangled()
2997 .GetDemangledName()
2998 .AsCString();
2999 if (so_path && so_path[0]) {
3000 std::string full_so_path(so_path);
3001 const size_t double_slash_pos =
3002 full_so_path.find("//");
3003 if (double_slash_pos != std::string::npos) {
3004 // The linker has been generating bad N_SO
3005 // entries with doubled up paths
3006 // in the format "%s%s" where the first
3007 // string in the DW_AT_comp_dir, and the
3008 // second is the directory for the source
3009 // file so you end up with a path that looks
3010 // like "/tmp/src//tmp/src/"
3011 FileSpec so_dir(so_path);
3012 if (!FileSystem::Instance().Exists(so_dir)) {
3013 so_dir.SetFile(
3014 &full_so_path[double_slash_pos + 1],
3015 FileSpec::Style::native);
3016 if (FileSystem::Instance().Exists(so_dir)) {
3017 // Trim off the incorrect path
3018 full_so_path.erase(0, double_slash_pos + 1);
3019 }
3020 }
3021 }
3022 if (*full_so_path.rbegin() != '/')
3023 full_so_path += '/';
3024 full_so_path += symbol_name;
3025 sym[sym_idx - 1].GetMangled().SetValue(
3026 ConstString(full_so_path.c_str()));
3027 add_nlist = false;
3028 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3029 }
3030 } else {
3031 // This could be a relative path to a N_SO
3032 N_SO_index = sym_idx;
3033 }
3034 }
3035 break;
3036
3037 case N_OSO:
3038 // object file name: name,,0,0,st_mtime
3039 type = eSymbolTypeObjectFile;
3040 break;
3041
3042 case N_LSYM:
3043 // local sym: name,,NO_SECT,type,offset
3044 type = eSymbolTypeLocal;
3045 break;
3046
3047 // INCL scopes
3048 case N_BINCL:
3049 // include file beginning: name,,NO_SECT,0,sum We use
3050 // the current number of symbols in the symbol table
3051 // in lieu of using nlist_idx in case we ever start
3052 // trimming entries out
3053 N_INCL_indexes.push_back(sym_idx);
3054 type = eSymbolTypeScopeBegin;
3055 break;
3056
3057 case N_EINCL:
3058 // include file end: name,,NO_SECT,0,0
3059 // Set the size of the N_BINCL to the terminating
3060 // index of this N_EINCL so that we can always skip
3061 // the entire symbol if we need to navigate more
3062 // quickly at the source level when parsing STABS
3063 if (!N_INCL_indexes.empty()) {
3064 symbol_ptr =
3065 symtab.SymbolAtIndex(N_INCL_indexes.back());
3066 symbol_ptr->SetByteSize(sym_idx + 1);
3067 symbol_ptr->SetSizeIsSibling(true);
3068 N_INCL_indexes.pop_back();
3069 }
3070 type = eSymbolTypeScopeEnd;
3071 break;
3072
3073 case N_SOL:
3074 // #included file name: name,,n_sect,0,address
3075 type = eSymbolTypeHeaderFile;
3076
3077 // We currently don't use the header files on darwin
3078 add_nlist = false;
3079 break;
3080
3081 case N_PARAMS:
3082 // compiler parameters: name,,NO_SECT,0,0
3083 type = eSymbolTypeCompiler;
3084 break;
3085
3086 case N_VERSION:
3087 // compiler version: name,,NO_SECT,0,0
3088 type = eSymbolTypeCompiler;
3089 break;
3090
3091 case N_OLEVEL:
3092 // compiler -O level: name,,NO_SECT,0,0
3093 type = eSymbolTypeCompiler;
3094 break;
3095
3096 case N_PSYM:
3097 // parameter: name,,NO_SECT,type,offset
3098 type = eSymbolTypeVariable;
3099 break;
3100
3101 case N_ENTRY:
3102 // alternate entry: name,,n_sect,linenumber,address
3103 symbol_section = section_info.GetSection(nlist.n_sect,
3104 nlist.n_value);
3105 type = eSymbolTypeLineEntry;
3106 break;
3107
3108 // Left and Right Braces
3109 case N_LBRAC:
3110 // left bracket: 0,,NO_SECT,nesting level,address We
3111 // use the current number of symbols in the symbol
3112 // table in lieu of using nlist_idx in case we ever
3113 // start trimming entries out
3114 symbol_section = section_info.GetSection(nlist.n_sect,
3115 nlist.n_value);
3116 N_BRAC_indexes.push_back(sym_idx);
3117 type = eSymbolTypeScopeBegin;
3118 break;
3119
3120 case N_RBRAC:
3121 // right bracket: 0,,NO_SECT,nesting level,address
3122 // Set the size of the N_LBRAC to the terminating
3123 // index of this N_RBRAC so that we can always skip
3124 // the entire symbol if we need to navigate more
3125 // quickly at the source level when parsing STABS
3126 symbol_section = section_info.GetSection(nlist.n_sect,
3127 nlist.n_value);
3128 if (!N_BRAC_indexes.empty()) {
3129 symbol_ptr =
3130 symtab.SymbolAtIndex(N_BRAC_indexes.back());
3131 symbol_ptr->SetByteSize(sym_idx + 1);
3132 symbol_ptr->SetSizeIsSibling(true);
3133 N_BRAC_indexes.pop_back();
3134 }
3135 type = eSymbolTypeScopeEnd;
3136 break;
3137
3138 case N_EXCL:
3139 // deleted include file: name,,NO_SECT,0,sum
3140 type = eSymbolTypeHeaderFile;
3141 break;
3142
3143 // COMM scopes
3144 case N_BCOMM:
3145 // begin common: name,,NO_SECT,0,0
3146 // We use the current number of symbols in the symbol
3147 // table in lieu of using nlist_idx in case we ever
3148 // start trimming entries out
3149 type = eSymbolTypeScopeBegin;
3150 N_COMM_indexes.push_back(sym_idx);
3151 break;
3152
3153 case N_ECOML:
3154 // end common (local name): 0,,n_sect,0,address
3155 symbol_section = section_info.GetSection(nlist.n_sect,
3156 nlist.n_value);
3157 // Fall through
3158
3159 case N_ECOMM:
3160 // end common: name,,n_sect,0,0
3161 // Set the size of the N_BCOMM to the terminating
3162 // index of this N_ECOMM/N_ECOML so that we can
3163 // always skip the entire symbol if we need to
3164 // navigate more quickly at the source level when
3165 // parsing STABS
3166 if (!N_COMM_indexes.empty()) {
3167 symbol_ptr =
3168 symtab.SymbolAtIndex(N_COMM_indexes.back());
3169 symbol_ptr->SetByteSize(sym_idx + 1);
3170 symbol_ptr->SetSizeIsSibling(true);
3171 N_COMM_indexes.pop_back();
3172 }
3173 type = eSymbolTypeScopeEnd;
3174 break;
3175
3176 case N_LENG:
3177 // second stab entry with length information
3178 type = eSymbolTypeAdditional;
3179 break;
3180
3181 default:
3182 break;
3183 }
3184 } else {
3185 // uint8_t n_pext = N_PEXT & nlist.n_type;
3186 uint8_t n_type = N_TYPE & nlist.n_type;
3187 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3188
3189 switch (n_type) {
3190 case N_INDR: {
3191 const char *reexport_name_cstr =
3192 strtab_data.PeekCStr(nlist.n_value);
3193 if (reexport_name_cstr && reexport_name_cstr[0]) {
3194 type = eSymbolTypeReExported;
3195 ConstString reexport_name(
3196 reexport_name_cstr +
3197 ((reexport_name_cstr[0] == '_') ? 1 : 0));
3198 sym[sym_idx].SetReExportedSymbolName(reexport_name);
3199 set_value = false;
3200 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3201 indirect_symbol_names.insert(ConstString(
3202 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3203 } else
3204 type = eSymbolTypeUndefined;
3205 } break;
3206
3207 case N_UNDF:
3208 if (symbol_name && symbol_name[0]) {
3209 ConstString undefined_name(
3210 symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3211 undefined_name_to_desc[undefined_name] = nlist.n_desc;
3212 }
3213 // Fall through
3214 case N_PBUD:
3215 type = eSymbolTypeUndefined;
3216 break;
3217
3218 case N_ABS:
3219 type = eSymbolTypeAbsolute;
3220 break;
3221
3222 case N_SECT: {
3223 symbol_section = section_info.GetSection(nlist.n_sect,
3224 nlist.n_value);
3225
3226 if (symbol_section == NULL) {
3227 // TODO: warn about this?
3228 add_nlist = false;
3229 break;
3230 }
3231
3232 if (TEXT_eh_frame_sectID == nlist.n_sect) {
3233 type = eSymbolTypeException;
3234 } else {
3235 uint32_t section_type =
3236 symbol_section->Get() & SECTION_TYPE;
3237
3238 switch (section_type) {
3239 case S_CSTRING_LITERALS:
3240 type = eSymbolTypeData;
3241 break; // section with only literal C strings
3242 case S_4BYTE_LITERALS:
3243 type = eSymbolTypeData;
3244 break; // section with only 4 byte literals
3245 case S_8BYTE_LITERALS:
3246 type = eSymbolTypeData;
3247 break; // section with only 8 byte literals
3248 case S_LITERAL_POINTERS:
3249 type = eSymbolTypeTrampoline;
3250 break; // section with only pointers to literals
3251 case S_NON_LAZY_SYMBOL_POINTERS:
3252 type = eSymbolTypeTrampoline;
3253 break; // section with only non-lazy symbol
3254 // pointers
3255 case S_LAZY_SYMBOL_POINTERS:
3256 type = eSymbolTypeTrampoline;
3257 break; // section with only lazy symbol pointers
3258 case S_SYMBOL_STUBS:
3259 type = eSymbolTypeTrampoline;
3260 break; // section with only symbol stubs, byte
3261 // size of stub in the reserved2 field
3262 case S_MOD_INIT_FUNC_POINTERS:
3263 type = eSymbolTypeCode;
3264 break; // section with only function pointers for
3265 // initialization
3266 case S_MOD_TERM_FUNC_POINTERS:
3267 type = eSymbolTypeCode;
3268 break; // section with only function pointers for
3269 // termination
3270 case S_INTERPOSING:
3271 type = eSymbolTypeTrampoline;
3272 break; // section with only pairs of function
3273 // pointers for interposing
3274 case S_16BYTE_LITERALS:
3275 type = eSymbolTypeData;
3276 break; // section with only 16 byte literals
3277 case S_DTRACE_DOF:
3278 type = eSymbolTypeInstrumentation;
3279 break;
3280 case S_LAZY_DYLIB_SYMBOL_POINTERS:
3281 type = eSymbolTypeTrampoline;
3282 break;
3283 default:
3284 switch (symbol_section->GetType()) {
3285 case lldb::eSectionTypeCode:
3286 type = eSymbolTypeCode;
3287 break;
3288 case eSectionTypeData:
3289 case eSectionTypeDataCString: // Inlined C string
3290 // data
3291 case eSectionTypeDataCStringPointers: // Pointers
3292 // to C
3293 // string
3294 // data
3295 case eSectionTypeDataSymbolAddress: // Address of
3296 // a symbol in
3297 // the symbol
3298 // table
3299 case eSectionTypeData4:
3300 case eSectionTypeData8:
3301 case eSectionTypeData16:
3302 type = eSymbolTypeData;
3303 break;
3304 default:
3305 break;
3306 }
3307 break;
3308 }
3309
3310 if (type == eSymbolTypeInvalid) {
3311 const char *symbol_sect_name =
3312 symbol_section->GetName().AsCString();
3313 if (symbol_section->IsDescendant(
3314 text_section_sp.get())) {
3315 if (symbol_section->IsClear(
3316 S_ATTR_PURE_INSTRUCTIONS |
3317 S_ATTR_SELF_MODIFYING_CODE |
3318 S_ATTR_SOME_INSTRUCTIONS))
3319 type = eSymbolTypeData;
3320 else
3321 type = eSymbolTypeCode;
3322 } else if (symbol_section->IsDescendant(
3323 data_section_sp.get()) ||
3324 symbol_section->IsDescendant(
3325 data_dirty_section_sp.get()) ||
3326 symbol_section->IsDescendant(
3327 data_const_section_sp.get())) {
3328 if (symbol_sect_name &&
3329 ::strstr(symbol_sect_name, "__objc") ==
3330 symbol_sect_name) {
3331 type = eSymbolTypeRuntime;
3332
3333 if (symbol_name) {
3334 llvm::StringRef symbol_name_ref(symbol_name);
3335 if (symbol_name_ref.starts_with("_OBJC_")) {
3336 llvm::StringRef
3337 g_objc_v2_prefix_class(
3338 "_OBJC_CLASS_$_");
3339 llvm::StringRef
3340 g_objc_v2_prefix_metaclass(
3341 "_OBJC_METACLASS_$_");
3342 llvm::StringRef
3343 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3344 if (symbol_name_ref.starts_with(
3345 g_objc_v2_prefix_class)) {
3346 symbol_name_non_abi_mangled =
3347 symbol_name + 1;
3348 symbol_name =
3349 symbol_name +
3350 g_objc_v2_prefix_class.size();
3351 type = eSymbolTypeObjCClass;
3352 demangled_is_synthesized = true;
3353 } else if (
3354 symbol_name_ref.starts_with(
3355 g_objc_v2_prefix_metaclass)) {
3356 symbol_name_non_abi_mangled =
3357 symbol_name + 1;
3358 symbol_name =
3359 symbol_name +
3360 g_objc_v2_prefix_metaclass.size();
3361 type = eSymbolTypeObjCMetaClass;
3362 demangled_is_synthesized = true;
3363 } else if (symbol_name_ref.starts_with(
3364 g_objc_v2_prefix_ivar)) {
3365 symbol_name_non_abi_mangled =
3366 symbol_name + 1;
3367 symbol_name =
3368 symbol_name +
3369 g_objc_v2_prefix_ivar.size();
3370 type = eSymbolTypeObjCIVar;
3371 demangled_is_synthesized = true;
3372 }
3373 }
3374 }
3375 } else if (symbol_sect_name &&
3376 ::strstr(symbol_sect_name,
3377 "__gcc_except_tab") ==
3378 symbol_sect_name) {
3379 type = eSymbolTypeException;
3380 } else {
3381 type = eSymbolTypeData;
3382 }
3383 } else if (symbol_sect_name &&
3384 ::strstr(symbol_sect_name, "__IMPORT") ==
3385 symbol_sect_name) {
3386 type = eSymbolTypeTrampoline;
3387 } else if (symbol_section->IsDescendant(
3388 objc_section_sp.get())) {
3389 type = eSymbolTypeRuntime;
3390 if (symbol_name && symbol_name[0] == '.') {
3391 llvm::StringRef symbol_name_ref(symbol_name);
3392 llvm::StringRef
3393 g_objc_v1_prefix_class(".objc_class_name_");
3394 if (symbol_name_ref.starts_with(
3395 g_objc_v1_prefix_class)) {
3396 symbol_name_non_abi_mangled = symbol_name;
3397 symbol_name = symbol_name +
3398 g_objc_v1_prefix_class.size();
3399 type = eSymbolTypeObjCClass;
3400 demangled_is_synthesized = true;
3401 }
3402 }
3403 }
3404 }
3405 }
3406 } break;
3407 }
3408 }
3409
3410 if (add_nlist) {
3411 uint64_t symbol_value = nlist.n_value;
3412 if (symbol_name_non_abi_mangled) {
3413 sym[sym_idx].GetMangled().SetMangledName(
3414 ConstString(symbol_name_non_abi_mangled));
3415 sym[sym_idx].GetMangled().SetDemangledName(
3416 ConstString(symbol_name));
3417 } else {
3418 if (symbol_name && symbol_name[0] == '_') {
3419 symbol_name++; // Skip the leading underscore
3420 }
3421
3422 if (symbol_name) {
3423 ConstString const_symbol_name(symbol_name);
3424 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
3425 if (is_gsym && is_debug) {
3426 const char *gsym_name =
3427 sym[sym_idx]
3428 .GetMangled()
3429 .GetName(Mangled::ePreferMangled)
3430 .GetCString();
3431 if (gsym_name)
3432 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3433 }
3434 }
3435 }
3436 if (symbol_section) {
3437 const addr_t section_file_addr =
3438 symbol_section->GetFileAddress();
3439 if (symbol_byte_size == 0 &&
3440 function_starts_count > 0) {
3441 addr_t symbol_lookup_file_addr = nlist.n_value;
3442 // Do an exact address match for non-ARM addresses,
3443 // else get the closest since the symbol might be a
3444 // thumb symbol which has an address with bit zero
3445 // set
3446 FunctionStarts::Entry *func_start_entry =
3447 function_starts.FindEntry(symbol_lookup_file_addr,
3448 !is_arm);
3449 if (is_arm && func_start_entry) {
3450 // Verify that the function start address is the
3451 // symbol address (ARM) or the symbol address + 1
3452 // (thumb)
3453 if (func_start_entry->addr !=
3454 symbol_lookup_file_addr &&
3455 func_start_entry->addr !=
3456 (symbol_lookup_file_addr + 1)) {
3457 // Not the right entry, NULL it out...
3458 func_start_entry = NULL;
3459 }
3460 }
3461 if (func_start_entry) {
3462 func_start_entry->data = true;
3463
3464 addr_t symbol_file_addr = func_start_entry->addr;
3465 uint32_t symbol_flags = 0;
3466 if (is_arm) {
3467 if (symbol_file_addr & 1)
3468 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3469 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3470 }
3471
3472 const FunctionStarts::Entry *next_func_start_entry =
3473 function_starts.FindNextEntry(func_start_entry);
3474 const addr_t section_end_file_addr =
3475 section_file_addr +
3476 symbol_section->GetByteSize();
3477 if (next_func_start_entry) {
3478 addr_t next_symbol_file_addr =
3479 next_func_start_entry->addr;
3480 // Be sure the clear the Thumb address bit when
3481 // we calculate the size from the current and
3482 // next address
3483 if (is_arm)
3484 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3485 symbol_byte_size = std::min<lldb::addr_t>(
3486 next_symbol_file_addr - symbol_file_addr,
3487 section_end_file_addr - symbol_file_addr);
3488 } else {
3489 symbol_byte_size =
3490 section_end_file_addr - symbol_file_addr;
3491 }
3492 }
3493 }
3494 symbol_value -= section_file_addr;
3495 }
3496
3497 if (is_debug == false) {
3498 if (type == eSymbolTypeCode) {
3499 // See if we can find a N_FUN entry for any code
3500 // symbols. If we do find a match, and the name
3501 // matches, then we can merge the two into just the
3502 // function symbol to avoid duplicate entries in
3503 // the symbol table
3504 auto range =
3505 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3506 if (range.first != range.second) {
3507 bool found_it = false;
3508 for (auto pos = range.first; pos != range.second;
3509 ++pos) {
3510 if (sym[sym_idx].GetMangled().GetName(
3511 Mangled::ePreferMangled) ==
3512 sym[pos->second].GetMangled().GetName(
3513 Mangled::ePreferMangled)) {
3514 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3515 // We just need the flags from the linker
3516 // symbol, so put these flags
3517 // into the N_FUN flags to avoid duplicate
3518 // symbols in the symbol table
3519 sym[pos->second].SetExternal(
3520 sym[sym_idx].IsExternal());
3521 sym[pos->second].SetFlags(nlist.n_type << 16 |
3522 nlist.n_desc);
3523 if (resolver_addresses.find(nlist.n_value) !=
3524 resolver_addresses.end())
3525 sym[pos->second].SetType(eSymbolTypeResolver);
3526 sym[sym_idx].Clear();
3527 found_it = true;
3528 break;
3529 }
3530 }
3531 if (found_it)
3532 continue;
3533 } else {
3534 if (resolver_addresses.find(nlist.n_value) !=
3535 resolver_addresses.end())
3536 type = eSymbolTypeResolver;
3537 }
3538 } else if (type == eSymbolTypeData ||
3539 type == eSymbolTypeObjCClass ||
3540 type == eSymbolTypeObjCMetaClass ||
3541 type == eSymbolTypeObjCIVar) {
3542 // See if we can find a N_STSYM entry for any data
3543 // symbols. If we do find a match, and the name
3544 // matches, then we can merge the two into just the
3545 // Static symbol to avoid duplicate entries in the
3546 // symbol table
3547 auto range = N_STSYM_addr_to_sym_idx.equal_range(
3548 nlist.n_value);
3549 if (range.first != range.second) {
3550 bool found_it = false;
3551 for (auto pos = range.first; pos != range.second;
3552 ++pos) {
3553 if (sym[sym_idx].GetMangled().GetName(
3554 Mangled::ePreferMangled) ==
3555 sym[pos->second].GetMangled().GetName(
3556 Mangled::ePreferMangled)) {
3557 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3558 // We just need the flags from the linker
3559 // symbol, so put these flags
3560 // into the N_STSYM flags to avoid duplicate
3561 // symbols in the symbol table
3562 sym[pos->second].SetExternal(
3563 sym[sym_idx].IsExternal());
3564 sym[pos->second].SetFlags(nlist.n_type << 16 |
3565 nlist.n_desc);
3566 sym[sym_idx].Clear();
3567 found_it = true;
3568 break;
3569 }
3570 }
3571 if (found_it)
3572 continue;
3573 } else {
3574 const char *gsym_name =
3575 sym[sym_idx]
3576 .GetMangled()
3577 .GetName(Mangled::ePreferMangled)
3578 .GetCString();
3579 if (gsym_name) {
3580 // Combine N_GSYM stab entries with the non
3581 // stab symbol
3582 ConstNameToSymbolIndexMap::const_iterator pos =
3583 N_GSYM_name_to_sym_idx.find(gsym_name);
3584 if (pos != N_GSYM_name_to_sym_idx.end()) {
3585 const uint32_t GSYM_sym_idx = pos->second;
3586 m_nlist_idx_to_sym_idx[nlist_idx] =
3587 GSYM_sym_idx;
3588 // Copy the address, because often the N_GSYM
3589 // address has an invalid address of zero
3590 // when the global is a common symbol
3591 sym[GSYM_sym_idx].GetAddressRef().SetSection(
3592 symbol_section);
3593 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3594 symbol_value);
3595 add_symbol_addr(sym[GSYM_sym_idx]
3596 .GetAddress()
3597 .GetFileAddress());
3598 // We just need the flags from the linker
3599 // symbol, so put these flags
3600 // into the N_GSYM flags to avoid duplicate
3601 // symbols in the symbol table
3602 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3603 nlist.n_desc);
3604 sym[sym_idx].Clear();
3605 continue;
3606 }
3607 }
3608 }
3609 }
3610 }
3611
3612 sym[sym_idx].SetID(nlist_idx);
3613 sym[sym_idx].SetType(type);
3614 if (set_value) {
3615 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3616 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3617 add_symbol_addr(
3618 sym[sym_idx].GetAddress().GetFileAddress());
3619 }
3620 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3621
3622 if (symbol_byte_size > 0)
3623 sym[sym_idx].SetByteSize(symbol_byte_size);
3624
3625 if (demangled_is_synthesized)
3626 sym[sym_idx].SetDemangledNameIsSynthesized(true);
3627 ++sym_idx;
3628 } else {
3629 sym[sym_idx].Clear();
3630 }
3631 }
3632 /////////////////////////////
3633 }
3634 }
3635
3636 for (const auto &pos : reexport_shlib_needs_fixup) {
3637 const auto undef_pos = undefined_name_to_desc.find(pos.second);
3638 if (undef_pos != undefined_name_to_desc.end()) {
3639 const uint8_t dylib_ordinal =
3640 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3641 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3642 sym[pos.first].SetReExportedSymbolSharedLibrary(
3643 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3644 }
3645 }
3646 }
3647
3648#endif
3649 lldb::offset_t nlist_data_offset = 0;
3650
3651 if (nlist_data.GetByteSize() > 0) {
3652
3653 // If the sym array was not created while parsing the DSC unmapped
3654 // symbols, create it now.
3655 if (sym == nullptr) {
3656 sym =
3657 symtab.Resize(count: symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3658 num_syms = symtab.GetNumSymbols();
3659 }
3660
3661 if (unmapped_local_symbols_found) {
3662 assert(m_dysymtab.ilocalsym == 0);
3663 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3664 nlist_idx = m_dysymtab.nlocalsym;
3665 } else {
3666 nlist_idx = 0;
3667 }
3668
3669 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3670 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3671 UndefinedNameToDescMap undefined_name_to_desc;
3672 SymbolIndexToName reexport_shlib_needs_fixup;
3673
3674 // Symtab parsing is a huge mess. Everything is entangled and the code
3675 // requires access to a ridiculous amount of variables. LLDB depends
3676 // heavily on the proper merging of symbols and to get that right we need
3677 // to make sure we have parsed all the debug symbols first. Therefore we
3678 // invoke the lambda twice, once to parse only the debug symbols and then
3679 // once more to parse the remaining symbols.
3680 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3681 bool debug_only) {
3682 const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3683 if (is_debug != debug_only)
3684 return true;
3685
3686 const char *symbol_name_non_abi_mangled = nullptr;
3687 const char *symbol_name = nullptr;
3688
3689 if (have_strtab_data) {
3690 symbol_name = strtab_data.PeekCStr(offset: nlist.n_strx);
3691
3692 if (symbol_name == nullptr) {
3693 // No symbol should be NULL, even the symbols with no string values
3694 // should have an offset zero which points to an empty C-string
3695 Debugger::ReportError(message: llvm::formatv(
3696 Fmt: "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3697 "ignoring symbol",
3698 Vals&: nlist_idx, Vals&: nlist.n_strx, Vals: module_sp->GetFileSpec().GetPath()));
3699 return true;
3700 }
3701 if (symbol_name[0] == '\0')
3702 symbol_name = nullptr;
3703 } else {
3704 const addr_t str_addr = strtab_addr + nlist.n_strx;
3705 Status str_error;
3706 if (process->ReadCStringFromMemory(vm_addr: str_addr, out_str&: memory_symbol_name,
3707 error&: str_error))
3708 symbol_name = memory_symbol_name.c_str();
3709 }
3710
3711 SymbolType type = eSymbolTypeInvalid;
3712 SectionSP symbol_section;
3713 lldb::addr_t symbol_byte_size = 0;
3714 bool add_nlist = true;
3715 bool is_gsym = false;
3716 bool demangled_is_synthesized = false;
3717 bool set_value = true;
3718
3719 assert(sym_idx < num_syms);
3720 sym[sym_idx].SetDebug(is_debug);
3721
3722 if (is_debug) {
3723 switch (nlist.n_type) {
3724 case N_GSYM:
3725 // global symbol: name,,NO_SECT,type,0
3726 // Sometimes the N_GSYM value contains the address.
3727
3728 // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3729 // the ObjC data. They
3730 // have the same address, but we want to ensure that we always find
3731 // only the real symbol, 'cause we don't currently correctly
3732 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3733 // type. This is a temporary hack to make sure the ObjectiveC
3734 // symbols get treated correctly. To do this right, we should
3735 // coalesce all the GSYM & global symbols that have the same
3736 // address.
3737 is_gsym = true;
3738 sym[sym_idx].SetExternal(true);
3739
3740 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3741 llvm::StringRef symbol_name_ref(symbol_name);
3742 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
3743 symbol_name_non_abi_mangled = symbol_name + 1;
3744 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3745 type = eSymbolTypeObjCClass;
3746 demangled_is_synthesized = true;
3747
3748 } else if (symbol_name_ref.starts_with(
3749 Prefix: g_objc_v2_prefix_metaclass)) {
3750 symbol_name_non_abi_mangled = symbol_name + 1;
3751 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3752 type = eSymbolTypeObjCMetaClass;
3753 demangled_is_synthesized = true;
3754 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_ivar)) {
3755 symbol_name_non_abi_mangled = symbol_name + 1;
3756 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3757 type = eSymbolTypeObjCIVar;
3758 demangled_is_synthesized = true;
3759 }
3760 } else {
3761 if (nlist.n_value != 0)
3762 symbol_section =
3763 section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3764 type = eSymbolTypeData;
3765 }
3766 break;
3767
3768 case N_FNAME:
3769 // procedure name (f77 kludge): name,,NO_SECT,0,0
3770 type = eSymbolTypeCompiler;
3771 break;
3772
3773 case N_FUN:
3774 // procedure: name,,n_sect,linenumber,address
3775 if (symbol_name) {
3776 type = eSymbolTypeCode;
3777 symbol_section =
3778 section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3779
3780 N_FUN_addr_to_sym_idx.insert(
3781 x: std::make_pair(x&: nlist.n_value, y&: sym_idx));
3782 // We use the current number of symbols in the symbol table in
3783 // lieu of using nlist_idx in case we ever start trimming entries
3784 // out
3785 N_FUN_indexes.push_back(x: sym_idx);
3786 } else {
3787 type = eSymbolTypeCompiler;
3788
3789 if (!N_FUN_indexes.empty()) {
3790 // Copy the size of the function into the original STAB entry
3791 // so we don't have to hunt for it later
3792 symtab.SymbolAtIndex(idx: N_FUN_indexes.back())
3793 ->SetByteSize(nlist.n_value);
3794 N_FUN_indexes.pop_back();
3795 // We don't really need the end function STAB as it contains
3796 // the size which we already placed with the original symbol,
3797 // so don't add it if we want a minimal symbol table
3798 add_nlist = false;
3799 }
3800 }
3801 break;
3802
3803 case N_STSYM:
3804 // static symbol: name,,n_sect,type,address
3805 N_STSYM_addr_to_sym_idx.insert(
3806 x: std::make_pair(x&: nlist.n_value, y&: sym_idx));
3807 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3808 if (symbol_name && symbol_name[0]) {
3809 type = ObjectFile::GetSymbolTypeFromName(name: symbol_name + 1,
3810 symbol_type_hint: eSymbolTypeData);
3811 }
3812 break;
3813
3814 case N_LCSYM:
3815 // .lcomm symbol: name,,n_sect,type,address
3816 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3817 type = eSymbolTypeCommonBlock;
3818 break;
3819
3820 case N_BNSYM:
3821 // We use the current number of symbols in the symbol table in lieu
3822 // of using nlist_idx in case we ever start trimming entries out
3823 // Skip these if we want minimal symbol tables
3824 add_nlist = false;
3825 break;
3826
3827 case N_ENSYM:
3828 // Set the size of the N_BNSYM to the terminating index of this
3829 // N_ENSYM so that we can always skip the entire symbol if we need
3830 // to navigate more quickly at the source level when parsing STABS
3831 // Skip these if we want minimal symbol tables
3832 add_nlist = false;
3833 break;
3834
3835 case N_OPT:
3836 // emitted with gcc2_compiled and in gcc source
3837 type = eSymbolTypeCompiler;
3838 break;
3839
3840 case N_RSYM:
3841 // register sym: name,,NO_SECT,type,register
3842 type = eSymbolTypeVariable;
3843 break;
3844
3845 case N_SLINE:
3846 // src line: 0,,n_sect,linenumber,address
3847 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3848 type = eSymbolTypeLineEntry;
3849 break;
3850
3851 case N_SSYM:
3852 // structure elt: name,,NO_SECT,type,struct_offset
3853 type = eSymbolTypeVariableType;
3854 break;
3855
3856 case N_SO:
3857 // source file name
3858 type = eSymbolTypeSourceFile;
3859 if (symbol_name == nullptr) {
3860 add_nlist = false;
3861 if (N_SO_index != UINT32_MAX) {
3862 // Set the size of the N_SO to the terminating index of this
3863 // N_SO so that we can always skip the entire N_SO if we need
3864 // to navigate more quickly at the source level when parsing
3865 // STABS
3866 symbol_ptr = symtab.SymbolAtIndex(idx: N_SO_index);
3867 symbol_ptr->SetByteSize(sym_idx);
3868 symbol_ptr->SetSizeIsSibling(true);
3869 }
3870 N_NSYM_indexes.clear();
3871 N_INCL_indexes.clear();
3872 N_BRAC_indexes.clear();
3873 N_COMM_indexes.clear();
3874 N_FUN_indexes.clear();
3875 N_SO_index = UINT32_MAX;
3876 } else {
3877 // We use the current number of symbols in the symbol table in
3878 // lieu of using nlist_idx in case we ever start trimming entries
3879 // out
3880 const bool N_SO_has_full_path = symbol_name[0] == '/';
3881 if (N_SO_has_full_path) {
3882 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3883 // We have two consecutive N_SO entries where the first
3884 // contains a directory and the second contains a full path.
3885 sym[sym_idx - 1].GetMangled().SetValue(
3886 ConstString(symbol_name));
3887 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3888 add_nlist = false;
3889 } else {
3890 // This is the first entry in a N_SO that contains a
3891 // directory or a full path to the source file
3892 N_SO_index = sym_idx;
3893 }
3894 } else if ((N_SO_index == sym_idx - 1) &&
3895 ((sym_idx - 1) < num_syms)) {
3896 // This is usually the second N_SO entry that contains just the
3897 // filename, so here we combine it with the first one if we are
3898 // minimizing the symbol table
3899 const char *so_path =
3900 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3901 if (so_path && so_path[0]) {
3902 std::string full_so_path(so_path);
3903 const size_t double_slash_pos = full_so_path.find(s: "//");
3904 if (double_slash_pos != std::string::npos) {
3905 // The linker has been generating bad N_SO entries with
3906 // doubled up paths in the format "%s%s" where the first
3907 // string in the DW_AT_comp_dir, and the second is the
3908 // directory for the source file so you end up with a path
3909 // that looks like "/tmp/src//tmp/src/"
3910 FileSpec so_dir(so_path);
3911 if (!FileSystem::Instance().Exists(file_spec: so_dir)) {
3912 so_dir.SetFile(path: &full_so_path[double_slash_pos + 1],
3913 style: FileSpec::Style::native);
3914 if (FileSystem::Instance().Exists(file_spec: so_dir)) {
3915 // Trim off the incorrect path
3916 full_so_path.erase(pos: 0, n: double_slash_pos + 1);
3917 }
3918 }
3919 }
3920 if (*full_so_path.rbegin() != '/')
3921 full_so_path += '/';
3922 full_so_path += symbol_name;
3923 sym[sym_idx - 1].GetMangled().SetValue(
3924 ConstString(full_so_path.c_str()));
3925 add_nlist = false;
3926 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3927 }
3928 } else {
3929 // This could be a relative path to a N_SO
3930 N_SO_index = sym_idx;
3931 }
3932 }
3933 break;
3934
3935 case N_OSO:
3936 // object file name: name,,0,0,st_mtime
3937 type = eSymbolTypeObjectFile;
3938 break;
3939
3940 case N_LSYM:
3941 // local sym: name,,NO_SECT,type,offset
3942 type = eSymbolTypeLocal;
3943 break;
3944
3945 // INCL scopes
3946 case N_BINCL:
3947 // include file beginning: name,,NO_SECT,0,sum We use the current
3948 // number of symbols in the symbol table in lieu of using nlist_idx
3949 // in case we ever start trimming entries out
3950 N_INCL_indexes.push_back(x: sym_idx);
3951 type = eSymbolTypeScopeBegin;
3952 break;
3953
3954 case N_EINCL:
3955 // include file end: name,,NO_SECT,0,0
3956 // Set the size of the N_BINCL to the terminating index of this
3957 // N_EINCL so that we can always skip the entire symbol if we need
3958 // to navigate more quickly at the source level when parsing STABS
3959 if (!N_INCL_indexes.empty()) {
3960 symbol_ptr = symtab.SymbolAtIndex(idx: N_INCL_indexes.back());
3961 symbol_ptr->SetByteSize(sym_idx + 1);
3962 symbol_ptr->SetSizeIsSibling(true);
3963 N_INCL_indexes.pop_back();
3964 }
3965 type = eSymbolTypeScopeEnd;
3966 break;
3967
3968 case N_SOL:
3969 // #included file name: name,,n_sect,0,address
3970 type = eSymbolTypeHeaderFile;
3971
3972 // We currently don't use the header files on darwin
3973 add_nlist = false;
3974 break;
3975
3976 case N_PARAMS:
3977 // compiler parameters: name,,NO_SECT,0,0
3978 type = eSymbolTypeCompiler;
3979 break;
3980
3981 case N_VERSION:
3982 // compiler version: name,,NO_SECT,0,0
3983 type = eSymbolTypeCompiler;
3984 break;
3985
3986 case N_OLEVEL:
3987 // compiler -O level: name,,NO_SECT,0,0
3988 type = eSymbolTypeCompiler;
3989 break;
3990
3991 case N_PSYM:
3992 // parameter: name,,NO_SECT,type,offset
3993 type = eSymbolTypeVariable;
3994 break;
3995
3996 case N_ENTRY:
3997 // alternate entry: name,,n_sect,linenumber,address
3998 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3999 type = eSymbolTypeLineEntry;
4000 break;
4001
4002 // Left and Right Braces
4003 case N_LBRAC:
4004 // left bracket: 0,,NO_SECT,nesting level,address We use the
4005 // current number of symbols in the symbol table in lieu of using
4006 // nlist_idx in case we ever start trimming entries out
4007 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4008 N_BRAC_indexes.push_back(x: sym_idx);
4009 type = eSymbolTypeScopeBegin;
4010 break;
4011
4012 case N_RBRAC:
4013 // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4014 // the N_LBRAC to the terminating index of this N_RBRAC so that we
4015 // can always skip the entire symbol if we need to navigate more
4016 // quickly at the source level when parsing STABS
4017 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4018 if (!N_BRAC_indexes.empty()) {
4019 symbol_ptr = symtab.SymbolAtIndex(idx: N_BRAC_indexes.back());
4020 symbol_ptr->SetByteSize(sym_idx + 1);
4021 symbol_ptr->SetSizeIsSibling(true);
4022 N_BRAC_indexes.pop_back();
4023 }
4024 type = eSymbolTypeScopeEnd;
4025 break;
4026
4027 case N_EXCL:
4028 // deleted include file: name,,NO_SECT,0,sum
4029 type = eSymbolTypeHeaderFile;
4030 break;
4031
4032 // COMM scopes
4033 case N_BCOMM:
4034 // begin common: name,,NO_SECT,0,0
4035 // We use the current number of symbols in the symbol table in lieu
4036 // of using nlist_idx in case we ever start trimming entries out
4037 type = eSymbolTypeScopeBegin;
4038 N_COMM_indexes.push_back(x: sym_idx);
4039 break;
4040
4041 case N_ECOML:
4042 // end common (local name): 0,,n_sect,0,address
4043 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4044 [[fallthrough]];
4045
4046 case N_ECOMM:
4047 // end common: name,,n_sect,0,0
4048 // Set the size of the N_BCOMM to the terminating index of this
4049 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4050 // we need to navigate more quickly at the source level when
4051 // parsing STABS
4052 if (!N_COMM_indexes.empty()) {
4053 symbol_ptr = symtab.SymbolAtIndex(idx: N_COMM_indexes.back());
4054 symbol_ptr->SetByteSize(sym_idx + 1);
4055 symbol_ptr->SetSizeIsSibling(true);
4056 N_COMM_indexes.pop_back();
4057 }
4058 type = eSymbolTypeScopeEnd;
4059 break;
4060
4061 case N_LENG:
4062 // second stab entry with length information
4063 type = eSymbolTypeAdditional;
4064 break;
4065
4066 default:
4067 break;
4068 }
4069 } else {
4070 uint8_t n_type = N_TYPE & nlist.n_type;
4071 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4072
4073 switch (n_type) {
4074 case N_INDR: {
4075 const char *reexport_name_cstr = strtab_data.PeekCStr(offset: nlist.n_value);
4076 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4077 type = eSymbolTypeReExported;
4078 ConstString reexport_name(reexport_name_cstr +
4079 ((reexport_name_cstr[0] == '_') ? 1 : 0));
4080 sym[sym_idx].SetReExportedSymbolName(reexport_name);
4081 set_value = false;
4082 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4083 indirect_symbol_names.insert(
4084 x: ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4085 } else
4086 type = eSymbolTypeUndefined;
4087 } break;
4088
4089 case N_UNDF:
4090 if (symbol_name && symbol_name[0]) {
4091 ConstString undefined_name(symbol_name +
4092 ((symbol_name[0] == '_') ? 1 : 0));
4093 undefined_name_to_desc[undefined_name] = nlist.n_desc;
4094 }
4095 [[fallthrough]];
4096
4097 case N_PBUD:
4098 type = eSymbolTypeUndefined;
4099 break;
4100
4101 case N_ABS:
4102 type = eSymbolTypeAbsolute;
4103 break;
4104
4105 case N_SECT: {
4106 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4107
4108 if (!symbol_section) {
4109 // TODO: warn about this?
4110 add_nlist = false;
4111 break;
4112 }
4113
4114 if (TEXT_eh_frame_sectID == nlist.n_sect) {
4115 type = eSymbolTypeException;
4116 } else {
4117 uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4118
4119 switch (section_type) {
4120 case S_CSTRING_LITERALS:
4121 type = eSymbolTypeData;
4122 break; // section with only literal C strings
4123 case S_4BYTE_LITERALS:
4124 type = eSymbolTypeData;
4125 break; // section with only 4 byte literals
4126 case S_8BYTE_LITERALS:
4127 type = eSymbolTypeData;
4128 break; // section with only 8 byte literals
4129 case S_LITERAL_POINTERS:
4130 type = eSymbolTypeTrampoline;
4131 break; // section with only pointers to literals
4132 case S_NON_LAZY_SYMBOL_POINTERS:
4133 type = eSymbolTypeTrampoline;
4134 break; // section with only non-lazy symbol pointers
4135 case S_LAZY_SYMBOL_POINTERS:
4136 type = eSymbolTypeTrampoline;
4137 break; // section with only lazy symbol pointers
4138 case S_SYMBOL_STUBS:
4139 type = eSymbolTypeTrampoline;
4140 break; // section with only symbol stubs, byte size of stub in
4141 // the reserved2 field
4142 case S_MOD_INIT_FUNC_POINTERS:
4143 type = eSymbolTypeCode;
4144 break; // section with only function pointers for initialization
4145 case S_MOD_TERM_FUNC_POINTERS:
4146 type = eSymbolTypeCode;
4147 break; // section with only function pointers for termination
4148 case S_INTERPOSING:
4149 type = eSymbolTypeTrampoline;
4150 break; // section with only pairs of function pointers for
4151 // interposing
4152 case S_16BYTE_LITERALS:
4153 type = eSymbolTypeData;
4154 break; // section with only 16 byte literals
4155 case S_DTRACE_DOF:
4156 type = eSymbolTypeInstrumentation;
4157 break;
4158 case S_LAZY_DYLIB_SYMBOL_POINTERS:
4159 type = eSymbolTypeTrampoline;
4160 break;
4161 default:
4162 switch (symbol_section->GetType()) {
4163 case lldb::eSectionTypeCode:
4164 type = eSymbolTypeCode;
4165 break;
4166 case eSectionTypeData:
4167 case eSectionTypeDataCString: // Inlined C string data
4168 case eSectionTypeDataCStringPointers: // Pointers to C string
4169 // data
4170 case eSectionTypeDataSymbolAddress: // Address of a symbol in
4171 // the symbol table
4172 case eSectionTypeData4:
4173 case eSectionTypeData8:
4174 case eSectionTypeData16:
4175 type = eSymbolTypeData;
4176 break;
4177 default:
4178 break;
4179 }
4180 break;
4181 }
4182
4183 if (type == eSymbolTypeInvalid) {
4184 const char *symbol_sect_name =
4185 symbol_section->GetName().AsCString();
4186 if (symbol_section->IsDescendant(section: text_section_sp.get())) {
4187 if (symbol_section->IsClear(bit: S_ATTR_PURE_INSTRUCTIONS |
4188 S_ATTR_SELF_MODIFYING_CODE |
4189 S_ATTR_SOME_INSTRUCTIONS))
4190 type = eSymbolTypeData;
4191 else
4192 type = eSymbolTypeCode;
4193 } else if (symbol_section->IsDescendant(section: data_section_sp.get()) ||
4194 symbol_section->IsDescendant(
4195 section: data_dirty_section_sp.get()) ||
4196 symbol_section->IsDescendant(
4197 section: data_const_section_sp.get())) {
4198 if (symbol_sect_name &&
4199 ::strstr(haystack: symbol_sect_name, needle: "__objc") == symbol_sect_name) {
4200 type = eSymbolTypeRuntime;
4201
4202 if (symbol_name) {
4203 llvm::StringRef symbol_name_ref(symbol_name);
4204 if (symbol_name_ref.starts_with(Prefix: "_OBJC_")) {
4205 llvm::StringRef g_objc_v2_prefix_class(
4206 "_OBJC_CLASS_$_");
4207 llvm::StringRef g_objc_v2_prefix_metaclass(
4208 "_OBJC_METACLASS_$_");
4209 llvm::StringRef g_objc_v2_prefix_ivar(
4210 "_OBJC_IVAR_$_");
4211 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
4212 symbol_name_non_abi_mangled = symbol_name + 1;
4213 symbol_name =
4214 symbol_name + g_objc_v2_prefix_class.size();
4215 type = eSymbolTypeObjCClass;
4216 demangled_is_synthesized = true;
4217 } else if (symbol_name_ref.starts_with(
4218 Prefix: g_objc_v2_prefix_metaclass)) {
4219 symbol_name_non_abi_mangled = symbol_name + 1;
4220 symbol_name =
4221 symbol_name + g_objc_v2_prefix_metaclass.size();
4222 type = eSymbolTypeObjCMetaClass;
4223 demangled_is_synthesized = true;
4224 } else if (symbol_name_ref.starts_with(
4225 Prefix: g_objc_v2_prefix_ivar)) {
4226 symbol_name_non_abi_mangled = symbol_name + 1;
4227 symbol_name =
4228 symbol_name + g_objc_v2_prefix_ivar.size();
4229 type = eSymbolTypeObjCIVar;
4230 demangled_is_synthesized = true;
4231 }
4232 }
4233 }
4234 } else if (symbol_sect_name &&
4235 ::strstr(haystack: symbol_sect_name, needle: "__gcc_except_tab") ==
4236 symbol_sect_name) {
4237 type = eSymbolTypeException;
4238 } else {
4239 type = eSymbolTypeData;
4240 }
4241 } else if (symbol_sect_name &&
4242 ::strstr(haystack: symbol_sect_name, needle: "__IMPORT") ==
4243 symbol_sect_name) {
4244 type = eSymbolTypeTrampoline;
4245 } else if (symbol_section->IsDescendant(section: objc_section_sp.get())) {
4246 type = eSymbolTypeRuntime;
4247 if (symbol_name && symbol_name[0] == '.') {
4248 llvm::StringRef symbol_name_ref(symbol_name);
4249 llvm::StringRef g_objc_v1_prefix_class(
4250 ".objc_class_name_");
4251 if (symbol_name_ref.starts_with(Prefix: g_objc_v1_prefix_class)) {
4252 symbol_name_non_abi_mangled = symbol_name;
4253 symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4254 type = eSymbolTypeObjCClass;
4255 demangled_is_synthesized = true;
4256 }
4257 }
4258 }
4259 }
4260 }
4261 } break;
4262 }
4263 }
4264
4265 if (!add_nlist) {
4266 sym[sym_idx].Clear();
4267 return true;
4268 }
4269
4270 uint64_t symbol_value = nlist.n_value;
4271
4272 if (symbol_name_non_abi_mangled) {
4273 sym[sym_idx].GetMangled().SetMangledName(
4274 ConstString(symbol_name_non_abi_mangled));
4275 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4276 } else {
4277
4278 if (symbol_name && symbol_name[0] == '_') {
4279 symbol_name++; // Skip the leading underscore
4280 }
4281
4282 if (symbol_name) {
4283 ConstString const_symbol_name(symbol_name);
4284 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
4285 }
4286 }
4287
4288 if (is_gsym) {
4289 const char *gsym_name = sym[sym_idx]
4290 .GetMangled()
4291 .GetName(preference: Mangled::ePreferMangled)
4292 .GetCString();
4293 if (gsym_name)
4294 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4295 }
4296
4297 if (symbol_section) {
4298 const addr_t section_file_addr = symbol_section->GetFileAddress();
4299 if (symbol_byte_size == 0 && function_starts_count > 0) {
4300 addr_t symbol_lookup_file_addr = nlist.n_value;
4301 // Do an exact address match for non-ARM addresses, else get the
4302 // closest since the symbol might be a thumb symbol which has an
4303 // address with bit zero set.
4304 FunctionStarts::Entry *func_start_entry =
4305 function_starts.FindEntry(addr: symbol_lookup_file_addr, exact_match_only: !is_arm);
4306 if (is_arm && func_start_entry) {
4307 // Verify that the function start address is the symbol address
4308 // (ARM) or the symbol address + 1 (thumb).
4309 if (func_start_entry->addr != symbol_lookup_file_addr &&
4310 func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4311 // Not the right entry, NULL it out...
4312 func_start_entry = nullptr;
4313 }
4314 }
4315 if (func_start_entry) {
4316 func_start_entry->data = true;
4317
4318 addr_t symbol_file_addr = func_start_entry->addr;
4319 if (is_arm)
4320 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4321
4322 const FunctionStarts::Entry *next_func_start_entry =
4323 function_starts.FindNextEntry(entry: func_start_entry);
4324 const addr_t section_end_file_addr =
4325 section_file_addr + symbol_section->GetByteSize();
4326 if (next_func_start_entry) {
4327 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4328 // Be sure the clear the Thumb address bit when we calculate the
4329 // size from the current and next address
4330 if (is_arm)
4331 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4332 symbol_byte_size = std::min<lldb::addr_t>(
4333 a: next_symbol_file_addr - symbol_file_addr,
4334 b: section_end_file_addr - symbol_file_addr);
4335 } else {
4336 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4337 }
4338 }
4339 }
4340 symbol_value -= section_file_addr;
4341 }
4342
4343 if (!is_debug) {
4344 if (type == eSymbolTypeCode) {
4345 // See if we can find a N_FUN entry for any code symbols. If we do
4346 // find a match, and the name matches, then we can merge the two into
4347 // just the function symbol to avoid duplicate entries in the symbol
4348 // table.
4349 std::pair<ValueToSymbolIndexMap::const_iterator,
4350 ValueToSymbolIndexMap::const_iterator>
4351 range;
4352 range = N_FUN_addr_to_sym_idx.equal_range(x: nlist.n_value);
4353 if (range.first != range.second) {
4354 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4355 pos != range.second; ++pos) {
4356 if (sym[sym_idx].GetMangled().GetName(preference: Mangled::ePreferMangled) ==
4357 sym[pos->second].GetMangled().GetName(
4358 preference: Mangled::ePreferMangled)) {
4359 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4360 // We just need the flags from the linker symbol, so put these
4361 // flags into the N_FUN flags to avoid duplicate symbols in the
4362 // symbol table.
4363 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4364 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4365 if (resolver_addresses.find(x: nlist.n_value) !=
4366 resolver_addresses.end())
4367 sym[pos->second].SetType(eSymbolTypeResolver);
4368 sym[sym_idx].Clear();
4369 return true;
4370 }
4371 }
4372 } else {
4373 if (resolver_addresses.find(x: nlist.n_value) !=
4374 resolver_addresses.end())
4375 type = eSymbolTypeResolver;
4376 }
4377 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4378 type == eSymbolTypeObjCMetaClass ||
4379 type == eSymbolTypeObjCIVar) {
4380 // See if we can find a N_STSYM entry for any data symbols. If we do
4381 // find a match, and the name matches, then we can merge the two into
4382 // just the Static symbol to avoid duplicate entries in the symbol
4383 // table.
4384 std::pair<ValueToSymbolIndexMap::const_iterator,
4385 ValueToSymbolIndexMap::const_iterator>
4386 range;
4387 range = N_STSYM_addr_to_sym_idx.equal_range(x: nlist.n_value);
4388 if (range.first != range.second) {
4389 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4390 pos != range.second; ++pos) {
4391 if (sym[sym_idx].GetMangled().GetName(preference: Mangled::ePreferMangled) ==
4392 sym[pos->second].GetMangled().GetName(
4393 preference: Mangled::ePreferMangled)) {
4394 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4395 // We just need the flags from the linker symbol, so put these
4396 // flags into the N_STSYM flags to avoid duplicate symbols in
4397 // the symbol table.
4398 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4399 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4400 sym[sym_idx].Clear();
4401 return true;
4402 }
4403 }
4404 } else {
4405 // Combine N_GSYM stab entries with the non stab symbol.
4406 const char *gsym_name = sym[sym_idx]
4407 .GetMangled()
4408 .GetName(preference: Mangled::ePreferMangled)
4409 .GetCString();
4410 if (gsym_name) {
4411 ConstNameToSymbolIndexMap::const_iterator pos =
4412 N_GSYM_name_to_sym_idx.find(Val: gsym_name);
4413 if (pos != N_GSYM_name_to_sym_idx.end()) {
4414 const uint32_t GSYM_sym_idx = pos->second;
4415 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4416 // Copy the address, because often the N_GSYM address has an
4417 // invalid address of zero when the global is a common symbol.
4418 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4419 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4420 add_symbol_addr(
4421 sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4422 // We just need the flags from the linker symbol, so put these
4423 // flags into the N_GSYM flags to avoid duplicate symbols in
4424 // the symbol table.
4425 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4426 sym[sym_idx].Clear();
4427 return true;
4428 }
4429 }
4430 }
4431 }
4432 }
4433
4434 sym[sym_idx].SetID(nlist_idx);
4435 sym[sym_idx].SetType(type);
4436 if (set_value) {
4437 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4438 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4439 if (symbol_section)
4440 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4441 }
4442 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4443 if (nlist.n_desc & N_WEAK_REF)
4444 sym[sym_idx].SetIsWeak(true);
4445
4446 if (symbol_byte_size > 0)
4447 sym[sym_idx].SetByteSize(symbol_byte_size);
4448
4449 if (demangled_is_synthesized)
4450 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4451
4452 ++sym_idx;
4453 return true;
4454 };
4455
4456 // First parse all the nlists but don't process them yet. See the next
4457 // comment for an explanation why.
4458 std::vector<struct nlist_64> nlists;
4459 nlists.reserve(n: symtab_load_command.nsyms);
4460 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4461 if (auto nlist =
4462 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4463 nlists.push_back(x: *nlist);
4464 else
4465 break;
4466 }
4467
4468 // Now parse all the debug symbols. This is needed to merge non-debug
4469 // symbols in the next step. Non-debug symbols are always coalesced into
4470 // the debug symbol. Doing this in one step would mean that some symbols
4471 // won't be merged.
4472 nlist_idx = 0;
4473 for (auto &nlist : nlists) {
4474 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4475 break;
4476 }
4477
4478 // Finally parse all the non debug symbols.
4479 nlist_idx = 0;
4480 for (auto &nlist : nlists) {
4481 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4482 break;
4483 }
4484
4485 for (const auto &pos : reexport_shlib_needs_fixup) {
4486 const auto undef_pos = undefined_name_to_desc.find(Val: pos.second);
4487 if (undef_pos != undefined_name_to_desc.end()) {
4488 const uint8_t dylib_ordinal =
4489 llvm::MachO::GET_LIBRARY_ORDINAL(n_desc: undef_pos->second);
4490 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4491 sym[pos.first].SetReExportedSymbolSharedLibrary(
4492 dylib_files.GetFileSpecAtIndex(idx: dylib_ordinal - 1));
4493 }
4494 }
4495 }
4496
4497 // Count how many trie symbols we'll add to the symbol table
4498 int trie_symbol_table_augment_count = 0;
4499 for (auto &e : external_sym_trie_entries) {
4500 if (!symbols_added.contains(V: e.entry.address))
4501 trie_symbol_table_augment_count++;
4502 }
4503
4504 if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4505 num_syms = sym_idx + trie_symbol_table_augment_count;
4506 sym = symtab.Resize(count: num_syms);
4507 }
4508 uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4509
4510 // Add symbols from the trie to the symbol table.
4511 for (auto &e : external_sym_trie_entries) {
4512 if (symbols_added.contains(V: e.entry.address))
4513 continue;
4514
4515 // Find the section that this trie address is in, use that to annotate
4516 // symbol type as we add the trie address and name to the symbol table.
4517 Address symbol_addr;
4518 if (module_sp->ResolveFileAddress(vm_addr: e.entry.address, so_addr&: symbol_addr)) {
4519 SectionSP symbol_section(symbol_addr.GetSection());
4520 const char *symbol_name = e.entry.name.GetCString();
4521 bool demangled_is_synthesized = false;
4522 SymbolType type =
4523 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4524 data_section_sp, data_dirty_section_sp,
4525 data_const_section_sp, symbol_section);
4526
4527 sym[sym_idx].SetType(type);
4528 if (symbol_section) {
4529 sym[sym_idx].SetID(synthetic_sym_id++);
4530 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4531 if (demangled_is_synthesized)
4532 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4533 sym[sym_idx].SetIsSynthetic(true);
4534 sym[sym_idx].SetExternal(true);
4535 sym[sym_idx].GetAddressRef() = symbol_addr;
4536 add_symbol_addr(symbol_addr.GetFileAddress());
4537 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4538 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
4539 ++sym_idx;
4540 }
4541 }
4542 }
4543
4544 if (function_starts_count > 0) {
4545 uint32_t num_synthetic_function_symbols = 0;
4546 for (i = 0; i < function_starts_count; ++i) {
4547 if (!symbols_added.contains(V: function_starts.GetEntryRef(i).addr))
4548 ++num_synthetic_function_symbols;
4549 }
4550
4551 if (num_synthetic_function_symbols > 0) {
4552 if (num_syms < sym_idx + num_synthetic_function_symbols) {
4553 num_syms = sym_idx + num_synthetic_function_symbols;
4554 sym = symtab.Resize(count: num_syms);
4555 }
4556 for (i = 0; i < function_starts_count; ++i) {
4557 const FunctionStarts::Entry *func_start_entry =
4558 function_starts.GetEntryAtIndex(i);
4559 if (!symbols_added.contains(V: func_start_entry->addr)) {
4560 addr_t symbol_file_addr = func_start_entry->addr;
4561 uint32_t symbol_flags = 0;
4562 if (func_start_entry->data)
4563 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4564 Address symbol_addr;
4565 if (module_sp->ResolveFileAddress(vm_addr: symbol_file_addr, so_addr&: symbol_addr)) {
4566 SectionSP symbol_section(symbol_addr.GetSection());
4567 uint32_t symbol_byte_size = 0;
4568 if (symbol_section) {
4569 const addr_t section_file_addr = symbol_section->GetFileAddress();
4570 const FunctionStarts::Entry *next_func_start_entry =
4571 function_starts.FindNextEntry(entry: func_start_entry);
4572 const addr_t section_end_file_addr =
4573 section_file_addr + symbol_section->GetByteSize();
4574 if (next_func_start_entry) {
4575 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4576 if (is_arm)
4577 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4578 symbol_byte_size = std::min<lldb::addr_t>(
4579 a: next_symbol_file_addr - symbol_file_addr,
4580 b: section_end_file_addr - symbol_file_addr);
4581 } else {
4582 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4583 }
4584 sym[sym_idx].SetID(synthetic_sym_id++);
4585 // Don't set the name for any synthetic symbols, the Symbol
4586 // object will generate one if needed when the name is accessed
4587 // via accessors.
4588 sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4589 sym[sym_idx].SetType(eSymbolTypeCode);
4590 sym[sym_idx].SetIsSynthetic(true);
4591 sym[sym_idx].GetAddressRef() = symbol_addr;
4592 add_symbol_addr(symbol_addr.GetFileAddress());
4593 if (symbol_flags)
4594 sym[sym_idx].SetFlags(symbol_flags);
4595 if (symbol_byte_size)
4596 sym[sym_idx].SetByteSize(symbol_byte_size);
4597 ++sym_idx;
4598 }
4599 }
4600 }
4601 }
4602 }
4603 }
4604
4605 // Trim our symbols down to just what we ended up with after removing any
4606 // symbols.
4607 if (sym_idx < num_syms) {
4608 num_syms = sym_idx;
4609 sym = symtab.Resize(count: num_syms);
4610 }
4611
4612 // Now synthesize indirect symbols
4613 if (m_dysymtab.nindirectsyms != 0) {
4614 if (indirect_symbol_index_data.GetByteSize()) {
4615 NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4616 m_nlist_idx_to_sym_idx.end();
4617
4618 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4619 ++sect_idx) {
4620 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4621 S_SYMBOL_STUBS) {
4622 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4623 if (symbol_stub_byte_size == 0)
4624 continue;
4625
4626 const uint32_t num_symbol_stubs =
4627 m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4628
4629 if (num_symbol_stubs == 0)
4630 continue;
4631
4632 const uint32_t symbol_stub_index_offset =
4633 m_mach_sections[sect_idx].reserved1;
4634 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4635 const uint32_t symbol_stub_index =
4636 symbol_stub_index_offset + stub_idx;
4637 const lldb::addr_t symbol_stub_addr =
4638 m_mach_sections[sect_idx].addr +
4639 (stub_idx * symbol_stub_byte_size);
4640 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4641 if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4642 offset: symbol_stub_offset, length: 4)) {
4643 const uint32_t stub_sym_id =
4644 indirect_symbol_index_data.GetU32(offset_ptr: &symbol_stub_offset);
4645 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4646 continue;
4647
4648 NListIndexToSymbolIndexMap::const_iterator index_pos =
4649 m_nlist_idx_to_sym_idx.find(Val: stub_sym_id);
4650 Symbol *stub_symbol = nullptr;
4651 if (index_pos != end_index_pos) {
4652 // We have a remapping from the original nlist index to a
4653 // current symbol index, so just look this up by index
4654 stub_symbol = symtab.SymbolAtIndex(idx: index_pos->second);
4655 } else {
4656 // We need to lookup a symbol using the original nlist symbol
4657 // index since this index is coming from the S_SYMBOL_STUBS
4658 stub_symbol = symtab.FindSymbolByID(uid: stub_sym_id);
4659 }
4660
4661 if (stub_symbol) {
4662 Address so_addr(symbol_stub_addr, section_list);
4663
4664 if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4665 // Change the external symbol into a trampoline that makes
4666 // sense These symbols were N_UNDF N_EXT, and are useless
4667 // to us, so we can re-use them so we don't have to make up
4668 // a synthetic symbol for no good reason.
4669 if (resolver_addresses.find(x: symbol_stub_addr) ==
4670 resolver_addresses.end())
4671 stub_symbol->SetType(eSymbolTypeTrampoline);
4672 else
4673 stub_symbol->SetType(eSymbolTypeResolver);
4674 stub_symbol->SetExternal(false);
4675 stub_symbol->GetAddressRef() = so_addr;
4676 stub_symbol->SetByteSize(symbol_stub_byte_size);
4677 } else {
4678 // Make a synthetic symbol to describe the trampoline stub
4679 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4680 if (sym_idx >= num_syms) {
4681 sym = symtab.Resize(count: ++num_syms);
4682 stub_symbol = nullptr; // this pointer no longer valid
4683 }
4684 sym[sym_idx].SetID(synthetic_sym_id++);
4685 sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4686 if (resolver_addresses.find(x: symbol_stub_addr) ==
4687 resolver_addresses.end())
4688 sym[sym_idx].SetType(eSymbolTypeTrampoline);
4689 else
4690 sym[sym_idx].SetType(eSymbolTypeResolver);
4691 sym[sym_idx].SetIsSynthetic(true);
4692 sym[sym_idx].GetAddressRef() = so_addr;
4693 add_symbol_addr(so_addr.GetFileAddress());
4694 sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4695 ++sym_idx;
4696 }
4697 } else {
4698 if (log)
4699 log->Warning(fmt: "symbol stub referencing symbol table symbol "
4700 "%u that isn't in our minimal symbol table, "
4701 "fix this!!!",
4702 stub_sym_id);
4703 }
4704 }
4705 }
4706 }
4707 }
4708 }
4709 }
4710
4711 if (!reexport_trie_entries.empty()) {
4712 for (const auto &e : reexport_trie_entries) {
4713 if (e.entry.import_name) {
4714 // Only add indirect symbols from the Trie entries if we didn't have
4715 // a N_INDR nlist entry for this already
4716 if (indirect_symbol_names.find(x: e.entry.name) ==
4717 indirect_symbol_names.end()) {
4718 // Make a synthetic symbol to describe re-exported symbol.
4719 if (sym_idx >= num_syms)
4720 sym = symtab.Resize(count: ++num_syms);
4721 sym[sym_idx].SetID(synthetic_sym_id++);
4722 sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4723 sym[sym_idx].SetType(eSymbolTypeReExported);
4724 sym[sym_idx].SetIsSynthetic(true);
4725 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4726 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4727 sym[sym_idx].SetReExportedSymbolSharedLibrary(
4728 dylib_files.GetFileSpecAtIndex(idx: e.entry.other - 1));
4729 }
4730 ++sym_idx;
4731 }
4732 }
4733 }
4734 }
4735}
4736
4737void ObjectFileMachO::Dump(Stream *s) {
4738 ModuleSP module_sp(GetModule());
4739 if (module_sp) {
4740 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4741 s->Printf(format: "%p: ", static_cast<void *>(this));
4742 s->Indent();
4743 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4744 s->PutCString(cstr: "ObjectFileMachO64");
4745 else
4746 s->PutCString(cstr: "ObjectFileMachO32");
4747
4748 *s << ", file = '" << m_file;
4749 ModuleSpecList all_specs;
4750 ModuleSpec base_spec;
4751 GetAllArchSpecs(header: m_header, data: m_data, lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic),
4752 base_spec, all_specs);
4753 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4754 *s << "', triple";
4755 if (e)
4756 s->Printf(format: "[%d]", i);
4757 *s << " = ";
4758 *s << all_specs.GetModuleSpecRefAtIndex(i)
4759 .GetArchitecture()
4760 .GetTriple()
4761 .getTriple();
4762 }
4763 *s << "\n";
4764 SectionList *sections = GetSectionList();
4765 if (sections)
4766 sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true,
4767 UINT32_MAX);
4768
4769 if (m_symtab_up)
4770 m_symtab_up->Dump(s, target: nullptr, sort_type: eSortOrderNone);
4771 }
4772}
4773
4774UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4775 const lldb_private::DataExtractor &data,
4776 lldb::offset_t lc_offset) {
4777 uint32_t i;
4778 llvm::MachO::uuid_command load_cmd;
4779
4780 lldb::offset_t offset = lc_offset;
4781 for (i = 0; i < header.ncmds; ++i) {
4782 const lldb::offset_t cmd_offset = offset;
4783 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
4784 break;
4785
4786 if (load_cmd.cmd == LC_UUID) {
4787 const uint8_t *uuid_bytes = data.PeekData(offset, length: 16);
4788
4789 if (uuid_bytes) {
4790 // OpenCL on Mac OS X uses the same UUID for each of its object files.
4791 // We pretend these object files have no UUID to prevent crashing.
4792
4793 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4794 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4795 0xbb, 0x14, 0xf0, 0x0d};
4796
4797 if (!memcmp(s1: uuid_bytes, s2: opencl_uuid, n: 16))
4798 return UUID();
4799
4800 return UUID(uuid_bytes, 16);
4801 }
4802 return UUID();
4803 }
4804 offset = cmd_offset + load_cmd.cmdsize;
4805 }
4806 return UUID();
4807}
4808
4809static llvm::StringRef GetOSName(uint32_t cmd) {
4810 switch (cmd) {
4811 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4812 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4813 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4814 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::MacOSX);
4815 case llvm::MachO::LC_VERSION_MIN_TVOS:
4816 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
4817 case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4818 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
4819 default:
4820 llvm_unreachable("unexpected LC_VERSION load command");
4821 }
4822}
4823
4824namespace {
4825struct OSEnv {
4826 llvm::StringRef os_type;
4827 llvm::StringRef environment;
4828 OSEnv(uint32_t cmd) {
4829 switch (cmd) {
4830 case llvm::MachO::PLATFORM_MACOS:
4831 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::MacOSX);
4832 return;
4833 case llvm::MachO::PLATFORM_IOS:
4834 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4835 return;
4836 case llvm::MachO::PLATFORM_TVOS:
4837 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
4838 return;
4839 case llvm::MachO::PLATFORM_WATCHOS:
4840 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
4841 return;
4842 case llvm::MachO::PLATFORM_BRIDGEOS:
4843 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::BridgeOS);
4844 return;
4845 case llvm::MachO::PLATFORM_DRIVERKIT:
4846 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::DriverKit);
4847 return;
4848 case llvm::MachO::PLATFORM_MACCATALYST:
4849 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4850 environment = llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::MacABI);
4851 return;
4852 case llvm::MachO::PLATFORM_IOSSIMULATOR:
4853 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4854 environment =
4855 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
4856 return;
4857 case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4858 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
4859 environment =
4860 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
4861 return;
4862 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4863 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
4864 environment =
4865 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
4866 return;
4867 case llvm::MachO::PLATFORM_XROS:
4868 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::XROS);
4869 return;
4870 case llvm::MachO::PLATFORM_XROS_SIMULATOR:
4871 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::XROS);
4872 environment =
4873 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
4874 return;
4875 default: {
4876 Log *log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Process));
4877 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4878 }
4879 }
4880 }
4881};
4882
4883struct MinOS {
4884 uint32_t major_version, minor_version, patch_version;
4885 MinOS(uint32_t version)
4886 : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4887 patch_version(version & 0xffu) {}
4888};
4889} // namespace
4890
4891void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4892 const lldb_private::DataExtractor &data,
4893 lldb::offset_t lc_offset,
4894 ModuleSpec &base_spec,
4895 lldb_private::ModuleSpecList &all_specs) {
4896 auto &base_arch = base_spec.GetArchitecture();
4897 base_arch.SetArchitecture(arch_type: eArchTypeMachO, cpu: header.cputype, sub: header.cpusubtype);
4898 if (!base_arch.IsValid())
4899 return;
4900
4901 bool found_any = false;
4902 auto add_triple = [&](const llvm::Triple &triple) {
4903 auto spec = base_spec;
4904 spec.GetArchitecture().GetTriple() = triple;
4905 if (spec.GetArchitecture().IsValid()) {
4906 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4907 all_specs.Append(spec);
4908 found_any = true;
4909 }
4910 };
4911
4912 // Set OS to an unspecified unknown or a "*" so it can match any OS
4913 llvm::Triple base_triple = base_arch.GetTriple();
4914 base_triple.setOS(llvm::Triple::UnknownOS);
4915 base_triple.setOSName(llvm::StringRef());
4916
4917 if (header.filetype == MH_PRELOAD) {
4918 if (header.cputype == CPU_TYPE_ARM) {
4919 // If this is a 32-bit arm binary, and it's a standalone binary, force
4920 // the Vendor to Apple so we don't accidentally pick up the generic
4921 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4922 // frame pointer register; most other armv7 ABIs use a combination of
4923 // r7 and r11.
4924 base_triple.setVendor(llvm::Triple::Apple);
4925 } else {
4926 // Set vendor to an unspecified unknown or a "*" so it can match any
4927 // vendor This is required for correct behavior of EFI debugging on
4928 // x86_64
4929 base_triple.setVendor(llvm::Triple::UnknownVendor);
4930 base_triple.setVendorName(llvm::StringRef());
4931 }
4932 return add_triple(base_triple);
4933 }
4934
4935 llvm::MachO::load_command load_cmd;
4936
4937 // See if there is an LC_VERSION_MIN_* load command that can give
4938 // us the OS type.
4939 lldb::offset_t offset = lc_offset;
4940 for (uint32_t i = 0; i < header.ncmds; ++i) {
4941 const lldb::offset_t cmd_offset = offset;
4942 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
4943 break;
4944
4945 llvm::MachO::version_min_command version_min;
4946 switch (load_cmd.cmd) {
4947 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4948 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4949 case llvm::MachO::LC_VERSION_MIN_TVOS:
4950 case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
4951 if (load_cmd.cmdsize != sizeof(version_min))
4952 break;
4953 if (data.ExtractBytes(offset: cmd_offset, length: sizeof(version_min),
4954 dst_byte_order: data.GetByteOrder(), dst: &version_min) == 0)
4955 break;
4956 MinOS min_os(version_min.version);
4957 llvm::SmallString<32> os_name;
4958 llvm::raw_svector_ostream os(os_name);
4959 os << GetOSName(cmd: load_cmd.cmd) << min_os.major_version << '.'
4960 << min_os.minor_version << '.' << min_os.patch_version;
4961
4962 auto triple = base_triple;
4963 triple.setOSName(os.str());
4964
4965 // Disambiguate legacy simulator platforms.
4966 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
4967 (base_triple.getArch() == llvm::Triple::x86_64 ||
4968 base_triple.getArch() == llvm::Triple::x86)) {
4969 // The combination of legacy LC_VERSION_MIN load command and
4970 // x86 architecture always indicates a simulator environment.
4971 // The combination of LC_VERSION_MIN and arm architecture only
4972 // appears for native binaries. Back-deploying simulator
4973 // binaries on Apple Silicon Macs use the modern unambigous
4974 // LC_BUILD_VERSION load commands; no special handling required.
4975 triple.setEnvironment(llvm::Triple::Simulator);
4976 }
4977 add_triple(triple);
4978 break;
4979 }
4980 default:
4981 break;
4982 }
4983
4984 offset = cmd_offset + load_cmd.cmdsize;
4985 }
4986
4987 // See if there are LC_BUILD_VERSION load commands that can give
4988 // us the OS type.
4989 offset = lc_offset;
4990 for (uint32_t i = 0; i < header.ncmds; ++i) {
4991 const lldb::offset_t cmd_offset = offset;
4992 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
4993 break;
4994
4995 do {
4996 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
4997 llvm::MachO::build_version_command build_version;
4998 if (load_cmd.cmdsize < sizeof(build_version)) {
4999 // Malformed load command.
5000 break;
5001 }
5002 if (data.ExtractBytes(offset: cmd_offset, length: sizeof(build_version),
5003 dst_byte_order: data.GetByteOrder(), dst: &build_version) == 0)
5004 break;
5005 MinOS min_os(build_version.minos);
5006 OSEnv os_env(build_version.platform);
5007 llvm::SmallString<16> os_name;
5008 llvm::raw_svector_ostream os(os_name);
5009 os << os_env.os_type << min_os.major_version << '.'
5010 << min_os.minor_version << '.' << min_os.patch_version;
5011 auto triple = base_triple;
5012 triple.setOSName(os.str());
5013 os_name.clear();
5014 if (!os_env.environment.empty())
5015 triple.setEnvironmentName(os_env.environment);
5016 add_triple(triple);
5017 }
5018 } while (false);
5019 offset = cmd_offset + load_cmd.cmdsize;
5020 }
5021
5022 if (!found_any) {
5023 add_triple(base_triple);
5024 }
5025}
5026
5027ArchSpec ObjectFileMachO::GetArchitecture(
5028 ModuleSP module_sp, const llvm::MachO::mach_header &header,
5029 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5030 ModuleSpecList all_specs;
5031 ModuleSpec base_spec;
5032 GetAllArchSpecs(header, data, lc_offset: MachHeaderSizeFromMagic(magic: header.magic),
5033 base_spec, all_specs);
5034
5035 // If the object file offers multiple alternative load commands,
5036 // pick the one that matches the module.
5037 if (module_sp) {
5038 const ArchSpec &module_arch = module_sp->GetArchitecture();
5039 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5040 ArchSpec mach_arch =
5041 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
5042 if (module_arch.IsCompatibleMatch(rhs: mach_arch))
5043 return mach_arch;
5044 }
5045 }
5046
5047 // Return the first arch we found.
5048 if (all_specs.GetSize() == 0)
5049 return {};
5050 return all_specs.GetModuleSpecRefAtIndex(i: 0).GetArchitecture();
5051}
5052
5053UUID ObjectFileMachO::GetUUID() {
5054 ModuleSP module_sp(GetModule());
5055 if (module_sp) {
5056 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5057 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5058 return GetUUID(header: m_header, data: m_data, lc_offset: offset);
5059 }
5060 return UUID();
5061}
5062
5063uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) {
5064 ModuleSP module_sp = GetModule();
5065 if (!module_sp)
5066 return 0;
5067
5068 uint32_t count = 0;
5069 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5070 llvm::MachO::load_command load_cmd;
5071 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5072 std::vector<std::string> rpath_paths;
5073 std::vector<std::string> rpath_relative_paths;
5074 std::vector<std::string> at_exec_relative_paths;
5075 uint32_t i;
5076 for (i = 0; i < m_header.ncmds; ++i) {
5077 const uint32_t cmd_offset = offset;
5078 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5079 break;
5080
5081 switch (load_cmd.cmd) {
5082 case LC_RPATH:
5083 case LC_LOAD_DYLIB:
5084 case LC_LOAD_WEAK_DYLIB:
5085 case LC_REEXPORT_DYLIB:
5086 case LC_LOAD_DYLINKER:
5087 case LC_LOADFVMLIB:
5088 case LC_LOAD_UPWARD_DYLIB: {
5089 uint32_t name_offset = cmd_offset + m_data.GetU32(offset_ptr: &offset);
5090 // For LC_LOAD_DYLIB there is an alternate encoding
5091 // which adds a uint32_t `flags` field for `DYLD_USE_*`
5092 // flags. This can be detected by a timestamp field with
5093 // the `DYLIB_USE_MARKER` constant value.
5094 bool is_delayed_init = false;
5095 uint32_t use_command_marker = m_data.GetU32(offset_ptr: &offset);
5096 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) {
5097 offset += 4; /* uint32_t current_version */
5098 offset += 4; /* uint32_t compat_version */
5099 uint32_t flags = m_data.GetU32(offset_ptr: &offset);
5100 // If this LC_LOAD_DYLIB is marked delay-init,
5101 // don't report it as a dependent library -- it
5102 // may be loaded in the process at some point,
5103 // but will most likely not be load at launch.
5104 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */)
5105 is_delayed_init = true;
5106 }
5107 const char *path = m_data.PeekCStr(offset: name_offset);
5108 if (path && !is_delayed_init) {
5109 if (load_cmd.cmd == LC_RPATH)
5110 rpath_paths.push_back(x: path);
5111 else {
5112 if (path[0] == '@') {
5113 if (strncmp(s1: path, s2: "@rpath", n: strlen(s: "@rpath")) == 0)
5114 rpath_relative_paths.push_back(x: path + strlen(s: "@rpath"));
5115 else if (strncmp(s1: path, s2: "@executable_path",
5116 n: strlen(s: "@executable_path")) == 0)
5117 at_exec_relative_paths.push_back(x: path +
5118 strlen(s: "@executable_path"));
5119 } else {
5120 FileSpec file_spec(path);
5121 if (files.AppendIfUnique(file: file_spec))
5122 count++;
5123 }
5124 }
5125 }
5126 } break;
5127
5128 default:
5129 break;
5130 }
5131 offset = cmd_offset + load_cmd.cmdsize;
5132 }
5133
5134 FileSpec this_file_spec(m_file);
5135 FileSystem::Instance().Resolve(file_spec&: this_file_spec);
5136
5137 if (!rpath_paths.empty()) {
5138 // Fixup all LC_RPATH values to be absolute paths.
5139 const std::string this_directory =
5140 this_file_spec.GetDirectory().GetString();
5141 for (auto &rpath : rpath_paths) {
5142 if (llvm::StringRef(rpath).starts_with(Prefix: g_loader_path))
5143 rpath = this_directory + rpath.substr(pos: g_loader_path.size());
5144 else if (llvm::StringRef(rpath).starts_with(Prefix: g_executable_path))
5145 rpath = this_directory + rpath.substr(pos: g_executable_path.size());
5146 }
5147
5148 for (const auto &rpath_relative_path : rpath_relative_paths) {
5149 for (const auto &rpath : rpath_paths) {
5150 std::string path = rpath;
5151 path += rpath_relative_path;
5152 // It is OK to resolve this path because we must find a file on disk
5153 // for us to accept it anyway if it is rpath relative.
5154 FileSpec file_spec(path);
5155 FileSystem::Instance().Resolve(file_spec);
5156 if (FileSystem::Instance().Exists(file_spec) &&
5157 files.AppendIfUnique(file: file_spec)) {
5158 count++;
5159 break;
5160 }
5161 }
5162 }
5163 }
5164
5165 // We may have @executable_paths but no RPATHS. Figure those out here.
5166 // Only do this if this object file is the executable. We have no way to
5167 // get back to the actual executable otherwise, so we won't get the right
5168 // path.
5169 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5170 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5171 for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5172 FileSpec file_spec =
5173 exec_dir.CopyByAppendingPathComponent(component: at_exec_relative_path);
5174 if (FileSystem::Instance().Exists(file_spec) &&
5175 files.AppendIfUnique(file: file_spec))
5176 count++;
5177 }
5178 }
5179 return count;
5180}
5181
5182lldb_private::Address ObjectFileMachO::GetEntryPointAddress() {
5183 // If the object file is not an executable it can't hold the entry point.
5184 // m_entry_point_address is initialized to an invalid address, so we can just
5185 // return that. If m_entry_point_address is valid it means we've found it
5186 // already, so return the cached value.
5187
5188 if ((!IsExecutable() && !IsDynamicLoader()) ||
5189 m_entry_point_address.IsValid()) {
5190 return m_entry_point_address;
5191 }
5192
5193 // Otherwise, look for the UnixThread or Thread command. The data for the
5194 // Thread command is given in /usr/include/mach-o.h, but it is basically:
5195 //
5196 // uint32_t flavor - this is the flavor argument you would pass to
5197 // thread_get_state
5198 // uint32_t count - this is the count of longs in the thread state data
5199 // struct XXX_thread_state state - this is the structure from
5200 // <machine/thread_status.h> corresponding to the flavor.
5201 // <repeat this trio>
5202 //
5203 // So we just keep reading the various register flavors till we find the GPR
5204 // one, then read the PC out of there.
5205 // FIXME: We will need to have a "RegisterContext data provider" class at some
5206 // point that can get all the registers
5207 // out of data in this form & attach them to a given thread. That should
5208 // underlie the MacOS X User process plugin, and we'll also need it for the
5209 // MacOS X Core File process plugin. When we have that we can also use it
5210 // here.
5211 //
5212 // For now we hard-code the offsets and flavors we need:
5213 //
5214 //
5215
5216 ModuleSP module_sp(GetModule());
5217 if (module_sp) {
5218 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5219 llvm::MachO::load_command load_cmd;
5220 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5221 uint32_t i;
5222 lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5223 bool done = false;
5224
5225 for (i = 0; i < m_header.ncmds; ++i) {
5226 const lldb::offset_t cmd_offset = offset;
5227 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5228 break;
5229
5230 switch (load_cmd.cmd) {
5231 case LC_UNIXTHREAD:
5232 case LC_THREAD: {
5233 while (offset < cmd_offset + load_cmd.cmdsize) {
5234 uint32_t flavor = m_data.GetU32(offset_ptr: &offset);
5235 uint32_t count = m_data.GetU32(offset_ptr: &offset);
5236 if (count == 0) {
5237 // We've gotten off somehow, log and exit;
5238 return m_entry_point_address;
5239 }
5240
5241 switch (m_header.cputype) {
5242 case llvm::MachO::CPU_TYPE_ARM:
5243 if (flavor == 1 ||
5244 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5245 // from mach/arm/thread_status.h
5246 {
5247 offset += 60; // This is the offset of pc in the GPR thread state
5248 // data structure.
5249 start_address = m_data.GetU32(offset_ptr: &offset);
5250 done = true;
5251 }
5252 break;
5253 case llvm::MachO::CPU_TYPE_ARM64:
5254 case llvm::MachO::CPU_TYPE_ARM64_32:
5255 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5256 {
5257 offset += 256; // This is the offset of pc in the GPR thread state
5258 // data structure.
5259 start_address = m_data.GetU64(offset_ptr: &offset);
5260 done = true;
5261 }
5262 break;
5263 case llvm::MachO::CPU_TYPE_X86_64:
5264 if (flavor ==
5265 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5266 {
5267 offset += 16 * 8; // This is the offset of rip in the GPR thread
5268 // state data structure.
5269 start_address = m_data.GetU64(offset_ptr: &offset);
5270 done = true;
5271 }
5272 break;
5273 default:
5274 return m_entry_point_address;
5275 }
5276 // Haven't found the GPR flavor yet, skip over the data for this
5277 // flavor:
5278 if (done)
5279 break;
5280 offset += count * 4;
5281 }
5282 } break;
5283 case LC_MAIN: {
5284 uint64_t entryoffset = m_data.GetU64(offset_ptr: &offset);
5285 SectionSP text_segment_sp =
5286 GetSectionList()->FindSectionByName(section_dstr: GetSegmentNameTEXT());
5287 if (text_segment_sp) {
5288 done = true;
5289 start_address = text_segment_sp->GetFileAddress() + entryoffset;
5290 }
5291 } break;
5292
5293 default:
5294 break;
5295 }
5296 if (done)
5297 break;
5298
5299 // Go to the next load command:
5300 offset = cmd_offset + load_cmd.cmdsize;
5301 }
5302
5303 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5304 if (GetSymtab()) {
5305 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5306 name: ConstString("_dyld_start"), symbol_type: SymbolType::eSymbolTypeCode,
5307 symbol_debug_type: Symtab::eDebugAny, symbol_visibility: Symtab::eVisibilityAny);
5308 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5309 start_address = dyld_start_sym->GetAddress().GetFileAddress();
5310 }
5311 }
5312 }
5313
5314 if (start_address != LLDB_INVALID_ADDRESS) {
5315 // We got the start address from the load commands, so now resolve that
5316 // address in the sections of this ObjectFile:
5317 if (!m_entry_point_address.ResolveAddressUsingFileSections(
5318 addr: start_address, sections: GetSectionList())) {
5319 m_entry_point_address.Clear();
5320 }
5321 } else {
5322 // We couldn't read the UnixThread load command - maybe it wasn't there.
5323 // As a fallback look for the "start" symbol in the main executable.
5324
5325 ModuleSP module_sp(GetModule());
5326
5327 if (module_sp) {
5328 SymbolContextList contexts;
5329 SymbolContext context;
5330 module_sp->FindSymbolsWithNameAndType(name: ConstString("start"),
5331 symbol_type: eSymbolTypeCode, sc_list&: contexts);
5332 if (contexts.GetSize()) {
5333 if (contexts.GetContextAtIndex(idx: 0, sc&: context))
5334 m_entry_point_address = context.symbol->GetAddress();
5335 }
5336 }
5337 }
5338 }
5339
5340 return m_entry_point_address;
5341}
5342
5343lldb_private::Address ObjectFileMachO::GetBaseAddress() {
5344 lldb_private::Address header_addr;
5345 SectionList *section_list = GetSectionList();
5346 if (section_list) {
5347 SectionSP text_segment_sp(
5348 section_list->FindSectionByName(section_dstr: GetSegmentNameTEXT()));
5349 if (text_segment_sp) {
5350 header_addr.SetSection(text_segment_sp);
5351 header_addr.SetOffset(0);
5352 }
5353 }
5354 return header_addr;
5355}
5356
5357uint32_t ObjectFileMachO::GetNumThreadContexts() {
5358 ModuleSP module_sp(GetModule());
5359 if (module_sp) {
5360 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5361 if (!m_thread_context_offsets_valid) {
5362 m_thread_context_offsets_valid = true;
5363 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5364 FileRangeArray::Entry file_range;
5365 llvm::MachO::thread_command thread_cmd;
5366 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5367 const uint32_t cmd_offset = offset;
5368 if (m_data.GetU32(offset_ptr: &offset, dst: &thread_cmd, count: 2) == nullptr)
5369 break;
5370
5371 if (thread_cmd.cmd == LC_THREAD) {
5372 file_range.SetRangeBase(offset);
5373 file_range.SetByteSize(thread_cmd.cmdsize - 8);
5374 m_thread_context_offsets.Append(entry: file_range);
5375 }
5376 offset = cmd_offset + thread_cmd.cmdsize;
5377 }
5378 }
5379 }
5380 return m_thread_context_offsets.GetSize();
5381}
5382
5383std::vector<std::tuple<offset_t, offset_t>>
5384ObjectFileMachO::FindLC_NOTEByName(std::string name) {
5385 std::vector<std::tuple<offset_t, offset_t>> results;
5386 ModuleSP module_sp(GetModule());
5387 if (module_sp) {
5388 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5389
5390 offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5391 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5392 const uint32_t cmd_offset = offset;
5393 llvm::MachO::load_command lc = {};
5394 if (m_data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
5395 break;
5396 if (lc.cmd == LC_NOTE) {
5397 char data_owner[17];
5398 m_data.CopyData(offset, length: 16, dst: data_owner);
5399 data_owner[16] = '\0';
5400 offset += 16;
5401
5402 if (name == data_owner) {
5403 offset_t payload_offset = m_data.GetU64_unchecked(offset_ptr: &offset);
5404 offset_t payload_size = m_data.GetU64_unchecked(offset_ptr: &offset);
5405 results.push_back(x: {payload_offset, payload_size});
5406 }
5407 }
5408 offset = cmd_offset + lc.cmdsize;
5409 }
5410 }
5411 return results;
5412}
5413
5414std::string ObjectFileMachO::GetIdentifierString() {
5415 Log *log(
5416 GetLog(mask: LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5417 ModuleSP module_sp(GetModule());
5418 if (module_sp) {
5419 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5420
5421 auto lc_notes = FindLC_NOTEByName(name: "kern ver str");
5422 for (auto lc_note : lc_notes) {
5423 offset_t payload_offset = std::get<0>(t&: lc_note);
5424 offset_t payload_size = std::get<1>(t&: lc_note);
5425 uint32_t version;
5426 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr) {
5427 if (version == 1) {
5428 uint32_t strsize = payload_size - sizeof(uint32_t);
5429 std::string result(strsize, '\0');
5430 m_data.CopyData(offset: payload_offset, length: strsize, dst: result.data());
5431 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'",
5432 result.c_str());
5433 return result;
5434 }
5435 }
5436 }
5437
5438 // Second, make a pass over the load commands looking for an obsolete
5439 // LC_IDENT load command.
5440 offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5441 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5442 const uint32_t cmd_offset = offset;
5443 llvm::MachO::ident_command ident_command;
5444 if (m_data.GetU32(offset_ptr: &offset, dst: &ident_command, count: 2) == nullptr)
5445 break;
5446 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5447 std::string result(ident_command.cmdsize, '\0');
5448 if (m_data.CopyData(offset, length: ident_command.cmdsize, dst: result.data()) ==
5449 ident_command.cmdsize) {
5450 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str());
5451 return result;
5452 }
5453 }
5454 offset = cmd_offset + ident_command.cmdsize;
5455 }
5456 }
5457 return {};
5458}
5459
5460AddressableBits ObjectFileMachO::GetAddressableBits() {
5461 AddressableBits addressable_bits;
5462
5463 Log *log(GetLog(mask: LLDBLog::Process));
5464 ModuleSP module_sp(GetModule());
5465 if (module_sp) {
5466 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5467 auto lc_notes = FindLC_NOTEByName(name: "addrable bits");
5468 for (auto lc_note : lc_notes) {
5469 offset_t payload_offset = std::get<0>(t&: lc_note);
5470 uint32_t version;
5471 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr) {
5472 if (version == 3) {
5473 uint32_t num_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5474 addressable_bits.SetAddressableBits(num_addr_bits);
5475 LLDB_LOGF(log,
5476 "LC_NOTE 'addrable bits' v3 found, value %d "
5477 "bits",
5478 num_addr_bits);
5479 }
5480 if (version == 4) {
5481 uint32_t lo_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5482 uint32_t hi_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5483
5484 if (lo_addr_bits == hi_addr_bits)
5485 addressable_bits.SetAddressableBits(lo_addr_bits);
5486 else
5487 addressable_bits.SetAddressableBits(lowmem_addressing_bits: lo_addr_bits, highmem_addressing_bits: hi_addr_bits);
5488 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits",
5489 lo_addr_bits, hi_addr_bits);
5490 }
5491 }
5492 }
5493 }
5494 return addressable_bits;
5495}
5496
5497bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value,
5498 bool &value_is_offset,
5499 UUID &uuid,
5500 ObjectFile::BinaryType &type) {
5501 Log *log(
5502 GetLog(mask: LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5503 value = LLDB_INVALID_ADDRESS;
5504 value_is_offset = false;
5505 uuid.Clear();
5506 uint32_t log2_pagesize = 0; // not currently passed up to caller
5507 uint32_t platform = 0; // not currently passed up to caller
5508 ModuleSP module_sp(GetModule());
5509 if (module_sp) {
5510 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5511
5512 auto lc_notes = FindLC_NOTEByName(name: "main bin spec");
5513 for (auto lc_note : lc_notes) {
5514 offset_t payload_offset = std::get<0>(t&: lc_note);
5515
5516 // struct main_bin_spec
5517 // {
5518 // uint32_t version; // currently 2
5519 // uint32_t type; // 0 == unspecified,
5520 // // 1 == kernel
5521 // // 2 == user process,
5522 // dyld mach-o binary addr
5523 // // 3 == standalone binary
5524 // // 4 == user process,
5525 // // dyld_all_image_infos addr
5526 // uint64_t address; // UINT64_MAX if address not specified
5527 // uint64_t slide; // slide, UINT64_MAX if unspecified
5528 // // 0 if no slide needs to be applied to
5529 // // file address
5530 // uuid_t uuid; // all zero's if uuid not specified
5531 // uint32_t log2_pagesize; // process page size in log base 2,
5532 // // e.g. 4k pages are 12.
5533 // // 0 for unspecified
5534 // uint32_t platform; // The Mach-O platform for this corefile.
5535 // // 0 for unspecified.
5536 // // The values are defined in
5537 // // <mach-o/loader.h>, PLATFORM_*.
5538 // } __attribute((packed));
5539
5540 // "main bin spec" (main binary specification) data payload is
5541 // formatted:
5542 // uint32_t version [currently 1]
5543 // uint32_t type [0 == unspecified, 1 == kernel,
5544 // 2 == user process, 3 == firmware ]
5545 // uint64_t address [ UINT64_MAX if address not specified ]
5546 // uuid_t uuid [ all zero's if uuid not specified ]
5547 // uint32_t log2_pagesize [ process page size in log base
5548 // 2, e.g. 4k pages are 12.
5549 // 0 for unspecified ]
5550 // uint32_t unused [ for alignment ]
5551
5552 uint32_t version;
5553 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr &&
5554 version <= 2) {
5555 uint32_t binspec_type = 0;
5556 uuid_t raw_uuid;
5557 memset(s: raw_uuid, c: 0, n: sizeof(uuid_t));
5558
5559 if (!m_data.GetU32(offset_ptr: &payload_offset, dst: &binspec_type, count: 1))
5560 return false;
5561 if (!m_data.GetU64(offset_ptr: &payload_offset, dst: &value, count: 1))
5562 return false;
5563 uint64_t slide = LLDB_INVALID_ADDRESS;
5564 if (version > 1 && !m_data.GetU64(offset_ptr: &payload_offset, dst: &slide, count: 1))
5565 return false;
5566 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) {
5567 value = slide;
5568 value_is_offset = true;
5569 }
5570
5571 if (m_data.CopyData(offset: payload_offset, length: sizeof(uuid_t), dst: raw_uuid) != 0) {
5572 uuid = UUID(raw_uuid, sizeof(uuid_t));
5573 // convert the "main bin spec" type into our
5574 // ObjectFile::BinaryType enum
5575 const char *typestr = "unrecognized type";
5576 type = eBinaryTypeInvalid;
5577 switch (binspec_type) {
5578 case 0:
5579 type = eBinaryTypeUnknown;
5580 typestr = "uknown";
5581 break;
5582 case 1:
5583 type = eBinaryTypeKernel;
5584 typestr = "xnu kernel";
5585 break;
5586 case 2:
5587 type = eBinaryTypeUser;
5588 typestr = "userland dyld";
5589 break;
5590 case 3:
5591 type = eBinaryTypeStandalone;
5592 typestr = "standalone";
5593 break;
5594 case 4:
5595 type = eBinaryTypeUserAllImageInfos;
5596 typestr = "userland dyld_all_image_infos";
5597 break;
5598 }
5599 LLDB_LOGF(log,
5600 "LC_NOTE 'main bin spec' found, version %d type %d "
5601 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s",
5602 version, type, typestr, value,
5603 value_is_offset ? "true" : "false",
5604 uuid.GetAsString().c_str());
5605 if (!m_data.GetU32(offset_ptr: &payload_offset, dst: &log2_pagesize, count: 1))
5606 return false;
5607 if (version > 1 && !m_data.GetU32(offset_ptr: &payload_offset, dst: &platform, count: 1))
5608 return false;
5609 return true;
5610 }
5611 }
5612 }
5613 }
5614 return false;
5615}
5616
5617bool ObjectFileMachO::GetCorefileThreadExtraInfos(
5618 std::vector<lldb::tid_t> &tids) {
5619 tids.clear();
5620 ModuleSP module_sp(GetModule());
5621 if (module_sp) {
5622 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5623
5624 Log *log(GetLog(mask: LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
5625 if (StructuredData::ObjectSP object_sp = GetCorefileProcessMetadata()) {
5626 StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
5627 StructuredData::Array *threads;
5628 if (!dict->GetValueForKeyAsArray(key: "threads", result&: threads) || !threads) {
5629 LLDB_LOGF(log,
5630 "'process metadata' LC_NOTE does not have a 'threads' key");
5631 return false;
5632 }
5633 if (threads->GetSize() != GetNumThreadContexts()) {
5634 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of "
5635 "threads does not match number of LC_THREADS.");
5636 return false;
5637 }
5638 const size_t num_threads = threads->GetSize();
5639 for (size_t i = 0; i < num_threads; i++) {
5640 std::optional<StructuredData::Dictionary *> maybe_thread =
5641 threads->GetItemAtIndexAsDictionary(idx: i);
5642 if (!maybe_thread) {
5643 LLDB_LOGF(log,
5644 "Unable to read 'process metadata' LC_NOTE, threads "
5645 "array does not have a dictionary at index %zu.",
5646 i);
5647 return false;
5648 }
5649 StructuredData::Dictionary *thread = *maybe_thread;
5650 lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
5651 if (thread->GetValueForKeyAsInteger<lldb::tid_t>(key: "thread_id", result&: tid))
5652 if (tid == 0)
5653 tid = LLDB_INVALID_THREAD_ID;
5654 tids.push_back(x: tid);
5655 }
5656
5657 if (log) {
5658 StreamString logmsg;
5659 logmsg.Printf(format: "LC_NOTE 'process metadata' found: ");
5660 dict->Dump(s&: logmsg, /* pretty_print */ false);
5661 LLDB_LOGF(log, "%s", logmsg.GetData());
5662 }
5663 return true;
5664 }
5665 }
5666 return false;
5667}
5668
5669StructuredData::ObjectSP ObjectFileMachO::GetCorefileProcessMetadata() {
5670 ModuleSP module_sp(GetModule());
5671 if (!module_sp)
5672 return {};
5673
5674 Log *log(GetLog(mask: LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
5675 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5676 auto lc_notes = FindLC_NOTEByName(name: "process metadata");
5677 if (lc_notes.size() == 0)
5678 return {};
5679
5680 if (lc_notes.size() > 1)
5681 LLDB_LOGF(
5682 log,
5683 "Multiple 'process metadata' LC_NOTEs found, only using the first.");
5684
5685 auto [payload_offset, strsize] = lc_notes[0];
5686 std::string buf(strsize, '\0');
5687 if (m_data.CopyData(offset: payload_offset, length: strsize, dst: buf.data()) != strsize) {
5688 LLDB_LOGF(log,
5689 "Unable to read %" PRIu64
5690 " bytes of 'process metadata' LC_NOTE JSON contents",
5691 strsize);
5692 return {};
5693 }
5694 while (buf.back() == '\0')
5695 buf.resize(n: buf.size() - 1);
5696 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(json_text: buf);
5697 if (!object_sp) {
5698 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
5699 "parse as valid JSON.");
5700 return {};
5701 }
5702 StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
5703 if (!dict) {
5704 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
5705 "get a dictionary.");
5706 return {};
5707 }
5708
5709 return object_sp;
5710}
5711
5712lldb::RegisterContextSP
5713ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx,
5714 lldb_private::Thread &thread) {
5715 lldb::RegisterContextSP reg_ctx_sp;
5716
5717 ModuleSP module_sp(GetModule());
5718 if (module_sp) {
5719 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5720 if (!m_thread_context_offsets_valid)
5721 GetNumThreadContexts();
5722
5723 const FileRangeArray::Entry *thread_context_file_range =
5724 m_thread_context_offsets.GetEntryAtIndex(i: idx);
5725 if (thread_context_file_range) {
5726
5727 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5728 thread_context_file_range->GetByteSize());
5729
5730 switch (m_header.cputype) {
5731 case llvm::MachO::CPU_TYPE_ARM64:
5732 case llvm::MachO::CPU_TYPE_ARM64_32:
5733 reg_ctx_sp =
5734 std::make_shared<RegisterContextDarwin_arm64_Mach>(args&: thread, args&: data);
5735 break;
5736
5737 case llvm::MachO::CPU_TYPE_ARM:
5738 reg_ctx_sp =
5739 std::make_shared<RegisterContextDarwin_arm_Mach>(args&: thread, args&: data);
5740 break;
5741
5742 case llvm::MachO::CPU_TYPE_X86_64:
5743 reg_ctx_sp =
5744 std::make_shared<RegisterContextDarwin_x86_64_Mach>(args&: thread, args&: data);
5745 break;
5746
5747 case llvm::MachO::CPU_TYPE_RISCV:
5748 reg_ctx_sp =
5749 std::make_shared<RegisterContextDarwin_riscv32_Mach>(args&: thread, args&: data);
5750 break;
5751 }
5752 }
5753 }
5754 return reg_ctx_sp;
5755}
5756
5757ObjectFile::Type ObjectFileMachO::CalculateType() {
5758 switch (m_header.filetype) {
5759 case MH_OBJECT: // 0x1u
5760 if (GetAddressByteSize() == 4) {
5761 // 32 bit kexts are just object files, but they do have a valid
5762 // UUID load command.
5763 if (GetUUID()) {
5764 // this checking for the UUID load command is not enough we could
5765 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5766 // this is required of kexts
5767 if (m_strata == eStrataInvalid)
5768 m_strata = eStrataKernel;
5769 return eTypeSharedLibrary;
5770 }
5771 }
5772 return eTypeObjectFile;
5773
5774 case MH_EXECUTE:
5775 return eTypeExecutable; // 0x2u
5776 case MH_FVMLIB:
5777 return eTypeSharedLibrary; // 0x3u
5778 case MH_CORE:
5779 return eTypeCoreFile; // 0x4u
5780 case MH_PRELOAD:
5781 return eTypeSharedLibrary; // 0x5u
5782 case MH_DYLIB:
5783 return eTypeSharedLibrary; // 0x6u
5784 case MH_DYLINKER:
5785 return eTypeDynamicLinker; // 0x7u
5786 case MH_BUNDLE:
5787 return eTypeSharedLibrary; // 0x8u
5788 case MH_DYLIB_STUB:
5789 return eTypeStubLibrary; // 0x9u
5790 case MH_DSYM:
5791 return eTypeDebugInfo; // 0xAu
5792 case MH_KEXT_BUNDLE:
5793 return eTypeSharedLibrary; // 0xBu
5794 default:
5795 break;
5796 }
5797 return eTypeUnknown;
5798}
5799
5800ObjectFile::Strata ObjectFileMachO::CalculateStrata() {
5801 switch (m_header.filetype) {
5802 case MH_OBJECT: // 0x1u
5803 {
5804 // 32 bit kexts are just object files, but they do have a valid
5805 // UUID load command.
5806 if (GetUUID()) {
5807 // this checking for the UUID load command is not enough we could
5808 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5809 // this is required of kexts
5810 if (m_type == eTypeInvalid)
5811 m_type = eTypeSharedLibrary;
5812
5813 return eStrataKernel;
5814 }
5815 }
5816 return eStrataUnknown;
5817
5818 case MH_EXECUTE: // 0x2u
5819 // Check for the MH_DYLDLINK bit in the flags
5820 if (m_header.flags & MH_DYLDLINK) {
5821 return eStrataUser;
5822 } else {
5823 SectionList *section_list = GetSectionList();
5824 if (section_list) {
5825 static ConstString g_kld_section_name("__KLD");
5826 if (section_list->FindSectionByName(section_dstr: g_kld_section_name))
5827 return eStrataKernel;
5828 }
5829 }
5830 return eStrataRawImage;
5831
5832 case MH_FVMLIB:
5833 return eStrataUser; // 0x3u
5834 case MH_CORE:
5835 return eStrataUnknown; // 0x4u
5836 case MH_PRELOAD:
5837 return eStrataRawImage; // 0x5u
5838 case MH_DYLIB:
5839 return eStrataUser; // 0x6u
5840 case MH_DYLINKER:
5841 return eStrataUser; // 0x7u
5842 case MH_BUNDLE:
5843 return eStrataUser; // 0x8u
5844 case MH_DYLIB_STUB:
5845 return eStrataUser; // 0x9u
5846 case MH_DSYM:
5847 return eStrataUnknown; // 0xAu
5848 case MH_KEXT_BUNDLE:
5849 return eStrataKernel; // 0xBu
5850 default:
5851 break;
5852 }
5853 return eStrataUnknown;
5854}
5855
5856llvm::VersionTuple ObjectFileMachO::GetVersion() {
5857 ModuleSP module_sp(GetModule());
5858 if (module_sp) {
5859 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5860 llvm::MachO::dylib_command load_cmd;
5861 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5862 uint32_t version_cmd = 0;
5863 uint64_t version = 0;
5864 uint32_t i;
5865 for (i = 0; i < m_header.ncmds; ++i) {
5866 const lldb::offset_t cmd_offset = offset;
5867 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5868 break;
5869
5870 if (load_cmd.cmd == LC_ID_DYLIB) {
5871 if (version_cmd == 0) {
5872 version_cmd = load_cmd.cmd;
5873 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd.dylib, count: 4) == nullptr)
5874 break;
5875 version = load_cmd.dylib.current_version;
5876 }
5877 break; // Break for now unless there is another more complete version
5878 // number load command in the future.
5879 }
5880 offset = cmd_offset + load_cmd.cmdsize;
5881 }
5882
5883 if (version_cmd == LC_ID_DYLIB) {
5884 unsigned major = (version & 0xFFFF0000ull) >> 16;
5885 unsigned minor = (version & 0x0000FF00ull) >> 8;
5886 unsigned subminor = (version & 0x000000FFull);
5887 return llvm::VersionTuple(major, minor, subminor);
5888 }
5889 }
5890 return llvm::VersionTuple();
5891}
5892
5893ArchSpec ObjectFileMachO::GetArchitecture() {
5894 ModuleSP module_sp(GetModule());
5895 ArchSpec arch;
5896 if (module_sp) {
5897 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5898
5899 return GetArchitecture(module_sp, header: m_header, data: m_data,
5900 lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic));
5901 }
5902 return arch;
5903}
5904
5905void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process,
5906 addr_t &base_addr, UUID &uuid) {
5907 uuid.Clear();
5908 base_addr = LLDB_INVALID_ADDRESS;
5909 if (process && process->GetDynamicLoader()) {
5910 DynamicLoader *dl = process->GetDynamicLoader();
5911 LazyBool using_shared_cache;
5912 LazyBool private_shared_cache;
5913 dl->GetSharedCacheInformation(base_address&: base_addr, uuid, using_shared_cache,
5914 private_shared_cache);
5915 }
5916 Log *log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Process));
5917 LLDB_LOGF(
5918 log,
5919 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64,
5920 uuid.GetAsString().c_str(), base_addr);
5921}
5922
5923// From dyld SPI header dyld_process_info.h
5924typedef void *dyld_process_info;
5925struct lldb_copy__dyld_process_cache_info {
5926 uuid_t cacheUUID; // UUID of cache used by process
5927 uint64_t cacheBaseAddress; // load address of dyld shared cache
5928 bool noCache; // process is running without a dyld cache
5929 bool privateCache; // process is using a private copy of its dyld cache
5930};
5931
5932// #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with
5933// llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile
5934// errors. So we need to use the actual underlying types of task_t and
5935// kern_return_t below.
5936extern "C" unsigned int /*task_t*/ mach_task_self();
5937
5938void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) {
5939 uuid.Clear();
5940 base_addr = LLDB_INVALID_ADDRESS;
5941
5942#if defined(__APPLE__)
5943 uint8_t *(*dyld_get_all_image_infos)(void);
5944 dyld_get_all_image_infos =
5945 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos");
5946 if (dyld_get_all_image_infos) {
5947 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos();
5948 if (dyld_all_image_infos_address) {
5949 uint32_t *version = (uint32_t *)
5950 dyld_all_image_infos_address; // version <mach-o/dyld_images.h>
5951 if (*version >= 13) {
5952 uuid_t *sharedCacheUUID_address = 0;
5953 int wordsize = sizeof(uint8_t *);
5954 if (wordsize == 8) {
5955 sharedCacheUUID_address =
5956 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
5957 160); // sharedCacheUUID <mach-o/dyld_images.h>
5958 if (*version >= 15)
5959 base_addr =
5960 *(uint64_t
5961 *)((uint8_t *)dyld_all_image_infos_address +
5962 176); // sharedCacheBaseAddress <mach-o/dyld_images.h>
5963 } else {
5964 sharedCacheUUID_address =
5965 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
5966 84); // sharedCacheUUID <mach-o/dyld_images.h>
5967 if (*version >= 15) {
5968 base_addr = 0;
5969 base_addr =
5970 *(uint32_t
5971 *)((uint8_t *)dyld_all_image_infos_address +
5972 100); // sharedCacheBaseAddress <mach-o/dyld_images.h>
5973 }
5974 }
5975 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t));
5976 }
5977 }
5978 } else {
5979 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI
5980 dyld_process_info (*dyld_process_info_create)(
5981 unsigned int /* task_t */ task, uint64_t timestamp,
5982 unsigned int /*kern_return_t*/ *kernelError);
5983 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo);
5984 void (*dyld_process_info_release)(dyld_process_info info);
5985
5986 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t,
5987 unsigned int /*kern_return_t*/ *))
5988 dlsym(RTLD_DEFAULT, "_dyld_process_info_create");
5989 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym(
5990 RTLD_DEFAULT, "_dyld_process_info_get_cache");
5991 dyld_process_info_release =
5992 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release");
5993
5994 if (dyld_process_info_create && dyld_process_info_get_cache) {
5995 unsigned int /*kern_return_t */ kern_ret;
5996 dyld_process_info process_info =
5997 dyld_process_info_create(::mach_task_self(), 0, &kern_ret);
5998 if (process_info) {
5999 struct lldb_copy__dyld_process_cache_info sc_info;
6000 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info));
6001 dyld_process_info_get_cache(process_info, &sc_info);
6002 if (sc_info.cacheBaseAddress != 0) {
6003 base_addr = sc_info.cacheBaseAddress;
6004 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t));
6005 }
6006 dyld_process_info_release(process_info);
6007 }
6008 }
6009 }
6010 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
6011 if (log && uuid.IsValid())
6012 LLDB_LOGF(log,
6013 "lldb's in-memory shared cache has a UUID of %s base address of "
6014 "0x%" PRIx64,
6015 uuid.GetAsString().c_str(), base_addr);
6016#endif
6017}
6018
6019static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data,
6020 lldb::offset_t offset,
6021 size_t ncmds) {
6022 for (size_t i = 0; i < ncmds; i++) {
6023 const lldb::offset_t load_cmd_offset = offset;
6024 llvm::MachO::load_command lc = {};
6025 if (data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
6026 break;
6027
6028 uint32_t version = 0;
6029 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX ||
6030 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS ||
6031 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS ||
6032 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) {
6033 // struct version_min_command {
6034 // uint32_t cmd; // LC_VERSION_MIN_*
6035 // uint32_t cmdsize;
6036 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz
6037 // uint32_t sdk;
6038 // };
6039 // We want to read version.
6040 version = data.GetU32(offset_ptr: &offset);
6041 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) {
6042 // struct build_version_command {
6043 // uint32_t cmd; // LC_BUILD_VERSION
6044 // uint32_t cmdsize;
6045 // uint32_t platform;
6046 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz
6047 // uint32_t sdk;
6048 // uint32_t ntools;
6049 // };
6050 // We want to read minos.
6051 offset += sizeof(uint32_t); // Skip over platform
6052 version = data.GetU32(offset_ptr: &offset); // Extract minos
6053 }
6054
6055 if (version) {
6056 const uint32_t xxxx = version >> 16;
6057 const uint32_t yy = (version >> 8) & 0xffu;
6058 const uint32_t zz = version & 0xffu;
6059 if (xxxx)
6060 return llvm::VersionTuple(xxxx, yy, zz);
6061 }
6062 offset = load_cmd_offset + lc.cmdsize;
6063 }
6064 return llvm::VersionTuple();
6065}
6066
6067llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() {
6068 if (!m_min_os_version)
6069 m_min_os_version = FindMinimumVersionInfo(
6070 data&: m_data, offset: MachHeaderSizeFromMagic(magic: m_header.magic), ncmds: m_header.ncmds);
6071 return *m_min_os_version;
6072}
6073
6074llvm::VersionTuple ObjectFileMachO::GetSDKVersion() {
6075 if (!m_sdk_versions)
6076 m_sdk_versions = FindMinimumVersionInfo(
6077 data&: m_data, offset: MachHeaderSizeFromMagic(magic: m_header.magic), ncmds: m_header.ncmds);
6078 return *m_sdk_versions;
6079}
6080
6081bool ObjectFileMachO::GetIsDynamicLinkEditor() {
6082 return m_header.filetype == llvm::MachO::MH_DYLINKER;
6083}
6084
6085bool ObjectFileMachO::CanTrustAddressRanges() {
6086 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can
6087 // be trusted by LLDB.
6088 return m_header.filetype == llvm::MachO::MH_DSYM;
6089}
6090
6091bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() {
6092 return m_allow_assembly_emulation_unwind_plans;
6093}
6094
6095Section *ObjectFileMachO::GetMachHeaderSection() {
6096 // Find the first address of the mach header which is the first non-zero file
6097 // sized section whose file offset is zero. This is the base file address of
6098 // the mach-o file which can be subtracted from the vmaddr of the other
6099 // segments found in memory and added to the load address
6100 ModuleSP module_sp = GetModule();
6101 if (!module_sp)
6102 return nullptr;
6103 SectionList *section_list = GetSectionList();
6104 if (!section_list)
6105 return nullptr;
6106
6107 // Some binaries can have a TEXT segment with a non-zero file offset.
6108 // Binaries in the shared cache are one example. Some hand-generated
6109 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order
6110 // in the file, even though they're laid out correctly in vmaddr terms.
6111 SectionSP text_segment_sp =
6112 section_list->FindSectionByName(section_dstr: GetSegmentNameTEXT());
6113 if (text_segment_sp.get() && SectionIsLoadable(section: text_segment_sp.get()))
6114 return text_segment_sp.get();
6115
6116 const size_t num_sections = section_list->GetSize();
6117 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6118 Section *section = section_list->GetSectionAtIndex(idx: sect_idx).get();
6119 if (section->GetFileOffset() == 0 && SectionIsLoadable(section))
6120 return section;
6121 }
6122
6123 return nullptr;
6124}
6125
6126bool ObjectFileMachO::SectionIsLoadable(const Section *section) {
6127 if (!section)
6128 return false;
6129 if (section->IsThreadSpecific())
6130 return false;
6131 if (GetModule().get() != section->GetModule().get())
6132 return false;
6133 // firmware style binaries with llvm gcov segment do
6134 // not have that segment mapped into memory.
6135 if (section->GetName() == GetSegmentNameLLVM_COV()) {
6136 const Strata strata = GetStrata();
6137 if (strata == eStrataKernel || strata == eStrataRawImage)
6138 return false;
6139 }
6140 // Be careful with __LINKEDIT and __DWARF segments
6141 if (section->GetName() == GetSegmentNameLINKEDIT() ||
6142 section->GetName() == GetSegmentNameDWARF()) {
6143 // Only map __LINKEDIT and __DWARF if we have an in memory image and
6144 // this isn't a kernel binary like a kext or mach_kernel.
6145 const bool is_memory_image = (bool)m_process_wp.lock();
6146 const Strata strata = GetStrata();
6147 if (is_memory_image == false || strata == eStrataKernel)
6148 return false;
6149 }
6150 return true;
6151}
6152
6153lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage(
6154 lldb::addr_t header_load_address, const Section *header_section,
6155 const Section *section) {
6156 ModuleSP module_sp = GetModule();
6157 if (module_sp && header_section && section &&
6158 header_load_address != LLDB_INVALID_ADDRESS) {
6159 lldb::addr_t file_addr = header_section->GetFileAddress();
6160 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section))
6161 return section->GetFileAddress() - file_addr + header_load_address;
6162 }
6163 return LLDB_INVALID_ADDRESS;
6164}
6165
6166bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value,
6167 bool value_is_offset) {
6168 Log *log(GetLog(mask: LLDBLog::DynamicLoader));
6169 ModuleSP module_sp = GetModule();
6170 if (!module_sp)
6171 return false;
6172
6173 SectionList *section_list = GetSectionList();
6174 if (!section_list)
6175 return false;
6176
6177 size_t num_loaded_sections = 0;
6178 const size_t num_sections = section_list->GetSize();
6179
6180 // Warn if some top-level segments map to the same address. The binary may be
6181 // malformed.
6182 const bool warn_multiple = true;
6183
6184 if (log) {
6185 StreamString logmsg;
6186 logmsg << "ObjectFileMachO::SetLoadAddress ";
6187 if (GetFileSpec())
6188 logmsg << "path='" << GetFileSpec().GetPath() << "' ";
6189 if (GetUUID()) {
6190 logmsg << "uuid=" << GetUUID().GetAsString();
6191 }
6192 LLDB_LOGF(log, "%s", logmsg.GetData());
6193 }
6194 if (value_is_offset) {
6195 // "value" is an offset to apply to each top level segment
6196 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6197 // Iterate through the object file sections to find all of the
6198 // sections that size on disk (to avoid __PAGEZERO) and load them
6199 SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx));
6200 if (SectionIsLoadable(section: section_sp.get())) {
6201 LLDB_LOGF(log,
6202 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6203 "0x%" PRIx64,
6204 section_sp->GetName().AsCString(),
6205 section_sp->GetFileAddress() + value);
6206 if (target.SetSectionLoadAddress(section: section_sp,
6207 load_addr: section_sp->GetFileAddress() + value,
6208 warn_multiple))
6209 ++num_loaded_sections;
6210 }
6211 }
6212 } else {
6213 // "value" is the new base address of the mach_header, adjust each
6214 // section accordingly
6215
6216 Section *mach_header_section = GetMachHeaderSection();
6217 if (mach_header_section) {
6218 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6219 SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx));
6220
6221 lldb::addr_t section_load_addr =
6222 CalculateSectionLoadAddressForMemoryImage(
6223 header_load_address: value, header_section: mach_header_section, section: section_sp.get());
6224 if (section_load_addr != LLDB_INVALID_ADDRESS) {
6225 LLDB_LOGF(log,
6226 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6227 "0x%" PRIx64,
6228 section_sp->GetName().AsCString(), section_load_addr);
6229 if (target.SetSectionLoadAddress(section: section_sp, load_addr: section_load_addr,
6230 warn_multiple))
6231 ++num_loaded_sections;
6232 }
6233 }
6234 }
6235 }
6236 return num_loaded_sections > 0;
6237}
6238
6239struct all_image_infos_header {
6240 uint32_t version; // currently 1
6241 uint32_t imgcount; // number of binary images
6242 uint64_t entries_fileoff; // file offset in the corefile of where the array of
6243 // struct entry's begin.
6244 uint32_t entries_size; // size of 'struct entry'.
6245 uint32_t unused;
6246};
6247
6248struct image_entry {
6249 uint64_t filepath_offset; // offset in corefile to c-string of the file path,
6250 // UINT64_MAX if unavailable.
6251 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if
6252 // uuid is unknown.
6253 uint64_t load_address; // UINT64_MAX if unknown.
6254 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's.
6255 uint32_t segment_count; // The number of segments for this binary.
6256 uint32_t unused;
6257
6258 image_entry() {
6259 filepath_offset = UINT64_MAX;
6260 memset(s: &uuid, c: 0, n: sizeof(uuid_t));
6261 segment_count = 0;
6262 load_address = UINT64_MAX;
6263 seg_addrs_offset = UINT64_MAX;
6264 unused = 0;
6265 }
6266 image_entry(const image_entry &rhs) {
6267 filepath_offset = rhs.filepath_offset;
6268 memcpy(dest: &uuid, src: &rhs.uuid, n: sizeof(uuid_t));
6269 segment_count = rhs.segment_count;
6270 seg_addrs_offset = rhs.seg_addrs_offset;
6271 load_address = rhs.load_address;
6272 unused = rhs.unused;
6273 }
6274};
6275
6276struct segment_vmaddr {
6277 char segname[16];
6278 uint64_t vmaddr;
6279 uint64_t unused;
6280
6281 segment_vmaddr() {
6282 memset(s: &segname, c: 0, n: 16);
6283 vmaddr = UINT64_MAX;
6284 unused = 0;
6285 }
6286 segment_vmaddr(const segment_vmaddr &rhs) {
6287 memcpy(dest: &segname, src: &rhs.segname, n: 16);
6288 vmaddr = rhs.vmaddr;
6289 unused = rhs.unused;
6290 }
6291};
6292
6293// Write the payload for the "all image infos" LC_NOTE into
6294// the supplied all_image_infos_payload, assuming that this
6295// will be written into the corefile starting at
6296// initial_file_offset.
6297//
6298// The placement of this payload is a little tricky. We're
6299// laying this out as
6300//
6301// 1. header (struct all_image_info_header)
6302// 2. Array of fixed-size (struct image_entry)'s, one
6303// per binary image present in the process.
6304// 3. Arrays of (struct segment_vmaddr)'s, a varying number
6305// for each binary image.
6306// 4. Variable length c-strings of binary image filepaths,
6307// one per binary.
6308//
6309// To compute where everything will be laid out in the
6310// payload, we need to iterate over the images and calculate
6311// how many segment_vmaddr structures each image will need,
6312// and how long each image's filepath c-string is. There
6313// are some multiple passes over the image list while calculating
6314// everything.
6315
6316static offset_t
6317CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp,
6318 offset_t initial_file_offset,
6319 StreamString &all_image_infos_payload,
6320 lldb_private::SaveCoreOptions &options) {
6321 Target &target = process_sp->GetTarget();
6322 ModuleList modules = target.GetImages();
6323
6324 // stack-only corefiles have no reason to include binaries that
6325 // are not executing; we're trying to make the smallest corefile
6326 // we can, so leave the rest out.
6327 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly)
6328 modules.Clear();
6329
6330 std::set<std::string> executing_uuids;
6331 std::vector<ThreadSP> thread_list =
6332 process_sp->CalculateCoreFileThreadList(core_options: options);
6333 for (const ThreadSP &thread_sp : thread_list) {
6334 uint32_t stack_frame_count = thread_sp->GetStackFrameCount();
6335 for (uint32_t j = 0; j < stack_frame_count; j++) {
6336 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(idx: j);
6337 Address pc = stack_frame_sp->GetFrameCodeAddress();
6338 ModuleSP module_sp = pc.GetModule();
6339 if (module_sp) {
6340 UUID uuid = module_sp->GetUUID();
6341 if (uuid.IsValid()) {
6342 executing_uuids.insert(x: uuid.GetAsString());
6343 modules.AppendIfNeeded(new_module: module_sp);
6344 }
6345 }
6346 }
6347 }
6348 size_t modules_count = modules.GetSize();
6349
6350 struct all_image_infos_header infos;
6351 infos.version = 1;
6352 infos.imgcount = modules_count;
6353 infos.entries_size = sizeof(image_entry);
6354 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header);
6355 infos.unused = 0;
6356
6357 all_image_infos_payload.PutHex32(uvalue: infos.version);
6358 all_image_infos_payload.PutHex32(uvalue: infos.imgcount);
6359 all_image_infos_payload.PutHex64(uvalue: infos.entries_fileoff);
6360 all_image_infos_payload.PutHex32(uvalue: infos.entries_size);
6361 all_image_infos_payload.PutHex32(uvalue: infos.unused);
6362
6363 // First create the structures for all of the segment name+vmaddr vectors
6364 // for each module, so we will know the size of them as we add the
6365 // module entries.
6366 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs;
6367 for (size_t i = 0; i < modules_count; i++) {
6368 ModuleSP module = modules.GetModuleAtIndex(idx: i);
6369
6370 SectionList *sections = module->GetSectionList();
6371 size_t sections_count = sections->GetSize();
6372 std::vector<segment_vmaddr> segment_vmaddrs;
6373 for (size_t j = 0; j < sections_count; j++) {
6374 SectionSP section = sections->GetSectionAtIndex(idx: j);
6375 if (!section->GetParent().get()) {
6376 addr_t vmaddr = section->GetLoadBaseAddress(target: &target);
6377 if (vmaddr == LLDB_INVALID_ADDRESS)
6378 continue;
6379 ConstString name = section->GetName();
6380 segment_vmaddr seg_vmaddr;
6381 // This is the uncommon case where strncpy is exactly
6382 // the right one, doesn't need to be nul terminated.
6383 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and
6384 // is not guaranteed to be nul-terminated if all 16 characters are
6385 // used.
6386 // coverity[buffer_size_warning]
6387 strncpy(dest: seg_vmaddr.segname, src: name.AsCString(),
6388 n: sizeof(seg_vmaddr.segname));
6389 seg_vmaddr.vmaddr = vmaddr;
6390 seg_vmaddr.unused = 0;
6391 segment_vmaddrs.push_back(x: seg_vmaddr);
6392 }
6393 }
6394 modules_segment_vmaddrs.push_back(x: segment_vmaddrs);
6395 }
6396
6397 offset_t size_of_vmaddr_structs = 0;
6398 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6399 size_of_vmaddr_structs +=
6400 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr);
6401 }
6402
6403 offset_t size_of_filepath_cstrings = 0;
6404 for (size_t i = 0; i < modules_count; i++) {
6405 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6406 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1;
6407 }
6408
6409 // Calculate the file offsets of our "all image infos" payload in the
6410 // corefile. initial_file_offset the original value passed in to this method.
6411
6412 offset_t start_of_entries =
6413 initial_file_offset + sizeof(all_image_infos_header);
6414 offset_t start_of_seg_vmaddrs =
6415 start_of_entries + sizeof(image_entry) * modules_count;
6416 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs;
6417
6418 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings;
6419
6420 // Now write the one-per-module 'struct image_entry' into the
6421 // StringStream; keep track of where the struct segment_vmaddr
6422 // entries for each module will end up in the corefile.
6423
6424 offset_t current_string_offset = start_of_filenames;
6425 offset_t current_segaddrs_offset = start_of_seg_vmaddrs;
6426 for (size_t i = 0; i < modules_count; i++) {
6427 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6428
6429 struct image_entry ent;
6430 memcpy(dest: &ent.uuid, src: module_sp->GetUUID().GetBytes().data(), n: sizeof(ent.uuid));
6431 if (modules_segment_vmaddrs[i].size() > 0) {
6432 ent.segment_count = modules_segment_vmaddrs[i].size();
6433 ent.seg_addrs_offset = current_segaddrs_offset;
6434 }
6435 ent.filepath_offset = current_string_offset;
6436 ObjectFile *objfile = module_sp->GetObjectFile();
6437 if (objfile) {
6438 Address base_addr(objfile->GetBaseAddress());
6439 if (base_addr.IsValid()) {
6440 ent.load_address = base_addr.GetLoadAddress(target: &target);
6441 }
6442 }
6443
6444 all_image_infos_payload.PutHex64(uvalue: ent.filepath_offset);
6445 all_image_infos_payload.PutRawBytes(s: ent.uuid, src_len: sizeof(ent.uuid));
6446 all_image_infos_payload.PutHex64(uvalue: ent.load_address);
6447 all_image_infos_payload.PutHex64(uvalue: ent.seg_addrs_offset);
6448 all_image_infos_payload.PutHex32(uvalue: ent.segment_count);
6449
6450 if (executing_uuids.find(x: module_sp->GetUUID().GetAsString()) !=
6451 executing_uuids.end())
6452 all_image_infos_payload.PutHex32(uvalue: 1);
6453 else
6454 all_image_infos_payload.PutHex32(uvalue: 0);
6455
6456 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr);
6457 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1;
6458 }
6459
6460 // Now write the struct segment_vmaddr entries into the StringStream.
6461
6462 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6463 if (modules_segment_vmaddrs[i].size() == 0)
6464 continue;
6465 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) {
6466 all_image_infos_payload.PutRawBytes(s: segvm.segname, src_len: sizeof(segvm.segname));
6467 all_image_infos_payload.PutHex64(uvalue: segvm.vmaddr);
6468 all_image_infos_payload.PutHex64(uvalue: segvm.unused);
6469 }
6470 }
6471
6472 for (size_t i = 0; i < modules_count; i++) {
6473 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6474 std::string filepath = module_sp->GetFileSpec().GetPath();
6475 all_image_infos_payload.PutRawBytes(s: filepath.data(), src_len: filepath.size() + 1);
6476 }
6477
6478 return final_file_offset;
6479}
6480
6481// Temp struct used to combine contiguous memory regions with
6482// identical permissions.
6483struct page_object {
6484 addr_t addr;
6485 addr_t size;
6486 uint32_t prot;
6487};
6488
6489bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp,
6490 lldb_private::SaveCoreOptions &options,
6491 Status &error) {
6492 // The FileSpec and Process are already checked in PluginManager::SaveCore.
6493 assert(options.GetOutputFile().has_value());
6494 assert(process_sp);
6495 const FileSpec outfile = options.GetOutputFile().value();
6496
6497 // MachO defaults to dirty pages
6498 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified)
6499 options.SetStyle(eSaveCoreDirtyOnly);
6500
6501 Target &target = process_sp->GetTarget();
6502 const ArchSpec target_arch = target.GetArchitecture();
6503 const llvm::Triple &target_triple = target_arch.GetTriple();
6504 if (target_triple.getVendor() == llvm::Triple::Apple &&
6505 (target_triple.getOS() == llvm::Triple::MacOSX ||
6506 target_triple.getOS() == llvm::Triple::IOS ||
6507 target_triple.getOS() == llvm::Triple::WatchOS ||
6508 target_triple.getOS() == llvm::Triple::TvOS ||
6509 target_triple.getOS() == llvm::Triple::BridgeOS ||
6510 target_triple.getOS() == llvm::Triple::XROS)) {
6511 bool make_core = false;
6512 switch (target_arch.GetMachine()) {
6513 case llvm::Triple::aarch64:
6514 case llvm::Triple::aarch64_32:
6515 case llvm::Triple::arm:
6516 case llvm::Triple::thumb:
6517 case llvm::Triple::x86:
6518 case llvm::Triple::x86_64:
6519 make_core = true;
6520 break;
6521 default:
6522 error = Status::FromErrorStringWithFormat(
6523 format: "unsupported core architecture: %s", target_triple.str().c_str());
6524 break;
6525 }
6526
6527 if (make_core) {
6528 CoreFileMemoryRanges core_ranges;
6529 error = process_sp->CalculateCoreFileSaveRanges(core_options: options, ranges&: core_ranges);
6530 if (error.Success()) {
6531 const uint32_t addr_byte_size = target_arch.GetAddressByteSize();
6532 const ByteOrder byte_order = target_arch.GetByteOrder();
6533 std::vector<llvm::MachO::segment_command_64> segment_load_commands;
6534 for (const auto &core_range_info : core_ranges) {
6535 // TODO: Refactor RangeDataVector to have a data iterator.
6536 const auto &core_range = core_range_info.data;
6537 uint32_t cmd_type = LC_SEGMENT_64;
6538 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64);
6539 if (addr_byte_size == 4) {
6540 cmd_type = LC_SEGMENT;
6541 segment_size = sizeof(llvm::MachO::segment_command);
6542 }
6543 // Skip any ranges with no read/write/execute permissions and empty
6544 // ranges.
6545 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0)
6546 continue;
6547 uint32_t vm_prot = 0;
6548 if (core_range.lldb_permissions & ePermissionsReadable)
6549 vm_prot |= VM_PROT_READ;
6550 if (core_range.lldb_permissions & ePermissionsWritable)
6551 vm_prot |= VM_PROT_WRITE;
6552 if (core_range.lldb_permissions & ePermissionsExecutable)
6553 vm_prot |= VM_PROT_EXECUTE;
6554 const addr_t vm_addr = core_range.range.start();
6555 const addr_t vm_size = core_range.range.size();
6556 llvm::MachO::segment_command_64 segment = {
6557 .cmd: cmd_type, // uint32_t cmd;
6558 .cmdsize: segment_size, // uint32_t cmdsize;
6559 .segname: {0}, // char segname[16];
6560 .vmaddr: vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O
6561 .vmsize: vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O
6562 .fileoff: 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O
6563 .filesize: vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O
6564 .maxprot: vm_prot, // uint32_t maxprot;
6565 .initprot: vm_prot, // uint32_t initprot;
6566 .nsects: 0, // uint32_t nsects;
6567 .flags: 0}; // uint32_t flags;
6568 segment_load_commands.push_back(x: segment);
6569 }
6570
6571 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order);
6572
6573 llvm::MachO::mach_header_64 mach_header;
6574 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC;
6575 mach_header.cputype = target_arch.GetMachOCPUType();
6576 mach_header.cpusubtype = target_arch.GetMachOCPUSubType();
6577 mach_header.filetype = MH_CORE;
6578 mach_header.ncmds = segment_load_commands.size();
6579 mach_header.flags = 0;
6580 mach_header.reserved = 0;
6581 ThreadList &thread_list = process_sp->GetThreadList();
6582 const uint32_t num_threads = thread_list.GetSize();
6583
6584 // Make an array of LC_THREAD data items. Each one contains the
6585 // contents of the LC_THREAD load command. The data doesn't contain
6586 // the load command + load command size, we will add the load command
6587 // and load command size as we emit the data.
6588 std::vector<StreamString> LC_THREAD_datas(num_threads);
6589 for (auto &LC_THREAD_data : LC_THREAD_datas) {
6590 LC_THREAD_data.GetFlags().Set(Stream::eBinary);
6591 LC_THREAD_data.SetAddressByteSize(addr_byte_size);
6592 LC_THREAD_data.SetByteOrder(byte_order);
6593 }
6594 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
6595 ThreadSP thread_sp(thread_list.GetThreadAtIndex(idx: thread_idx));
6596 if (thread_sp) {
6597 switch (mach_header.cputype) {
6598 case llvm::MachO::CPU_TYPE_ARM64:
6599 case llvm::MachO::CPU_TYPE_ARM64_32:
6600 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD(
6601 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6602 break;
6603
6604 case llvm::MachO::CPU_TYPE_ARM:
6605 RegisterContextDarwin_arm_Mach::Create_LC_THREAD(
6606 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6607 break;
6608
6609 case llvm::MachO::CPU_TYPE_X86_64:
6610 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD(
6611 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6612 break;
6613
6614 case llvm::MachO::CPU_TYPE_RISCV:
6615 RegisterContextDarwin_riscv32_Mach::Create_LC_THREAD(
6616 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6617 break;
6618 }
6619 }
6620 }
6621
6622 // The size of the load command is the size of the segments...
6623 if (addr_byte_size == 8) {
6624 mach_header.sizeofcmds = segment_load_commands.size() *
6625 sizeof(llvm::MachO::segment_command_64);
6626 } else {
6627 mach_header.sizeofcmds = segment_load_commands.size() *
6628 sizeof(llvm::MachO::segment_command);
6629 }
6630
6631 // and the size of all LC_THREAD load command
6632 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6633 ++mach_header.ncmds;
6634 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize();
6635 }
6636
6637 // Bits will be set to indicate which bits are NOT used in
6638 // addressing in this process or 0 for unknown.
6639 uint64_t address_mask = process_sp->GetCodeAddressMask();
6640 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6641 // LC_NOTE "addrable bits"
6642 mach_header.ncmds++;
6643 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6644 }
6645
6646 // LC_NOTE "process metadata"
6647 mach_header.ncmds++;
6648 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6649
6650 // LC_NOTE "all image infos"
6651 mach_header.ncmds++;
6652 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6653
6654 // Write the mach header
6655 buffer.PutHex32(uvalue: mach_header.magic);
6656 buffer.PutHex32(uvalue: mach_header.cputype);
6657 buffer.PutHex32(uvalue: mach_header.cpusubtype);
6658 buffer.PutHex32(uvalue: mach_header.filetype);
6659 buffer.PutHex32(uvalue: mach_header.ncmds);
6660 buffer.PutHex32(uvalue: mach_header.sizeofcmds);
6661 buffer.PutHex32(uvalue: mach_header.flags);
6662 if (addr_byte_size == 8) {
6663 buffer.PutHex32(uvalue: mach_header.reserved);
6664 }
6665
6666 // Skip the mach header and all load commands and align to the next
6667 // 0x1000 byte boundary
6668 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds;
6669
6670 file_offset = llvm::alignTo(Value: file_offset, Align: 16);
6671 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes;
6672
6673 // Add "addrable bits" LC_NOTE when an address mask is available
6674 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6675 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up(
6676 new LCNoteEntry(addr_byte_size, byte_order));
6677 addrable_bits_lcnote_up->name = "addrable bits";
6678 addrable_bits_lcnote_up->payload_file_offset = file_offset;
6679 int bits = std::bitset<64>(~address_mask).count();
6680 addrable_bits_lcnote_up->payload.PutHex32(uvalue: 4); // version
6681 addrable_bits_lcnote_up->payload.PutHex32(
6682 uvalue: bits); // # of bits used for low addresses
6683 addrable_bits_lcnote_up->payload.PutHex32(
6684 uvalue: bits); // # of bits used for high addresses
6685 addrable_bits_lcnote_up->payload.PutHex32(uvalue: 0); // reserved
6686
6687 file_offset += addrable_bits_lcnote_up->payload.GetSize();
6688
6689 lc_notes.push_back(x: std::move(addrable_bits_lcnote_up));
6690 }
6691
6692 // Add "process metadata" LC_NOTE
6693 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up(
6694 new LCNoteEntry(addr_byte_size, byte_order));
6695 thread_extrainfo_lcnote_up->name = "process metadata";
6696 thread_extrainfo_lcnote_up->payload_file_offset = file_offset;
6697
6698 StructuredData::DictionarySP dict(
6699 std::make_shared<StructuredData::Dictionary>());
6700 StructuredData::ArraySP threads(
6701 std::make_shared<StructuredData::Array>());
6702 for (const ThreadSP &thread_sp :
6703 process_sp->CalculateCoreFileThreadList(core_options: options)) {
6704 StructuredData::DictionarySP thread(
6705 std::make_shared<StructuredData::Dictionary>());
6706 thread->AddIntegerItem(key: "thread_id", value: thread_sp->GetID());
6707 threads->AddItem(item: thread);
6708 }
6709 dict->AddItem(key: "threads", value_sp: threads);
6710 StreamString strm;
6711 dict->Dump(s&: strm, /* pretty */ pretty_print: false);
6712 thread_extrainfo_lcnote_up->payload.PutRawBytes(s: strm.GetData(),
6713 src_len: strm.GetSize());
6714
6715 file_offset += thread_extrainfo_lcnote_up->payload.GetSize();
6716 file_offset = llvm::alignTo(Value: file_offset, Align: 16);
6717 lc_notes.push_back(x: std::move(thread_extrainfo_lcnote_up));
6718
6719 // Add "all image infos" LC_NOTE
6720 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up(
6721 new LCNoteEntry(addr_byte_size, byte_order));
6722 all_image_infos_lcnote_up->name = "all image infos";
6723 all_image_infos_lcnote_up->payload_file_offset = file_offset;
6724 file_offset = CreateAllImageInfosPayload(
6725 process_sp, initial_file_offset: file_offset, all_image_infos_payload&: all_image_infos_lcnote_up->payload,
6726 options);
6727 lc_notes.push_back(x: std::move(all_image_infos_lcnote_up));
6728
6729 // Add LC_NOTE load commands
6730 for (auto &lcnote : lc_notes) {
6731 // Add the LC_NOTE load command to the file.
6732 buffer.PutHex32(uvalue: LC_NOTE);
6733 buffer.PutHex32(uvalue: sizeof(llvm::MachO::note_command));
6734 char namebuf[16];
6735 memset(s: namebuf, c: 0, n: sizeof(namebuf));
6736 // This is the uncommon case where strncpy is exactly
6737 // the right one, doesn't need to be nul terminated.
6738 // LC_NOTE name field is char[16] and is not guaranteed to be
6739 // nul-terminated.
6740 // coverity[buffer_size_warning]
6741 strncpy(dest: namebuf, src: lcnote->name.c_str(), n: sizeof(namebuf));
6742 buffer.PutRawBytes(s: namebuf, src_len: sizeof(namebuf));
6743 buffer.PutHex64(uvalue: lcnote->payload_file_offset);
6744 buffer.PutHex64(uvalue: lcnote->payload.GetSize());
6745 }
6746
6747 // Align to 4096-byte page boundary for the LC_SEGMENTs.
6748 file_offset = llvm::alignTo(Value: file_offset, Align: 4096);
6749
6750 for (auto &segment : segment_load_commands) {
6751 segment.fileoff = file_offset;
6752 file_offset += segment.filesize;
6753 }
6754
6755 // Write out all of the LC_THREAD load commands
6756 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6757 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize();
6758 buffer.PutHex32(uvalue: LC_THREAD);
6759 buffer.PutHex32(uvalue: 8 + LC_THREAD_data_size); // cmd + cmdsize + data
6760 buffer.Write(src: LC_THREAD_data.GetString().data(), src_len: LC_THREAD_data_size);
6761 }
6762
6763 // Write out all of the segment load commands
6764 for (const auto &segment : segment_load_commands) {
6765 buffer.PutHex32(uvalue: segment.cmd);
6766 buffer.PutHex32(uvalue: segment.cmdsize);
6767 buffer.PutRawBytes(s: segment.segname, src_len: sizeof(segment.segname));
6768 if (addr_byte_size == 8) {
6769 buffer.PutHex64(uvalue: segment.vmaddr);
6770 buffer.PutHex64(uvalue: segment.vmsize);
6771 buffer.PutHex64(uvalue: segment.fileoff);
6772 buffer.PutHex64(uvalue: segment.filesize);
6773 } else {
6774 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.vmaddr));
6775 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.vmsize));
6776 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.fileoff));
6777 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.filesize));
6778 }
6779 buffer.PutHex32(uvalue: segment.maxprot);
6780 buffer.PutHex32(uvalue: segment.initprot);
6781 buffer.PutHex32(uvalue: segment.nsects);
6782 buffer.PutHex32(uvalue: segment.flags);
6783 }
6784
6785 std::string core_file_path(outfile.GetPath());
6786 auto core_file = FileSystem::Instance().Open(
6787 file_spec: outfile, options: File::eOpenOptionWriteOnly | File::eOpenOptionTruncate |
6788 File::eOpenOptionCanCreate);
6789 if (!core_file) {
6790 error = Status::FromError(error: core_file.takeError());
6791 } else {
6792 // Read 1 page at a time
6793 uint8_t bytes[0x1000];
6794 // Write the mach header and load commands out to the core file
6795 size_t bytes_written = buffer.GetString().size();
6796 error =
6797 core_file.get()->Write(buf: buffer.GetString().data(), num_bytes&: bytes_written);
6798 if (error.Success()) {
6799
6800 for (auto &lcnote : lc_notes) {
6801 if (core_file.get()->SeekFromStart(offset: lcnote->payload_file_offset) ==
6802 -1) {
6803 error = Status::FromErrorStringWithFormat(
6804 format: "Unable to seek to corefile pos "
6805 "to write '%s' LC_NOTE payload",
6806 lcnote->name.c_str());
6807 return false;
6808 }
6809 bytes_written = lcnote->payload.GetSize();
6810 error = core_file.get()->Write(buf: lcnote->payload.GetData(),
6811 num_bytes&: bytes_written);
6812 if (!error.Success())
6813 return false;
6814 }
6815
6816 // Now write the file data for all memory segments in the process
6817 for (const auto &segment : segment_load_commands) {
6818 if (core_file.get()->SeekFromStart(offset: segment.fileoff) == -1) {
6819 error = Status::FromErrorStringWithFormat(
6820 format: "unable to seek to offset 0x%" PRIx64 " in '%s'",
6821 segment.fileoff, core_file_path.c_str());
6822 break;
6823 }
6824
6825 target.GetDebugger().GetAsyncOutputStream()->Printf(
6826 format: "Saving %" PRId64
6827 " bytes of data for memory region at 0x%" PRIx64 "\n",
6828 segment.vmsize, segment.vmaddr);
6829 addr_t bytes_left = segment.vmsize;
6830 addr_t addr = segment.vmaddr;
6831 Status memory_read_error;
6832 while (bytes_left > 0 && error.Success()) {
6833 const size_t bytes_to_read =
6834 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left;
6835
6836 // In a savecore setting, we don't really care about caching,
6837 // as the data is dumped and very likely never read again,
6838 // so we call ReadMemoryFromInferior to bypass it.
6839 const size_t bytes_read = process_sp->ReadMemoryFromInferior(
6840 vm_addr: addr, buf: bytes, size: bytes_to_read, error&: memory_read_error);
6841
6842 if (bytes_read == bytes_to_read) {
6843 size_t bytes_written = bytes_read;
6844 error = core_file.get()->Write(buf: bytes, num_bytes&: bytes_written);
6845 bytes_left -= bytes_read;
6846 addr += bytes_read;
6847 } else {
6848 // Some pages within regions are not readable, those should
6849 // be zero filled
6850 memset(s: bytes, c: 0, n: bytes_to_read);
6851 size_t bytes_written = bytes_to_read;
6852 error = core_file.get()->Write(buf: bytes, num_bytes&: bytes_written);
6853 bytes_left -= bytes_to_read;
6854 addr += bytes_to_read;
6855 }
6856 }
6857 }
6858 }
6859 }
6860 }
6861 }
6862 return true; // This is the right plug to handle saving core files for
6863 // this process
6864 }
6865 return false;
6866}
6867
6868ObjectFileMachO::MachOCorefileAllImageInfos
6869ObjectFileMachO::GetCorefileAllImageInfos() {
6870 MachOCorefileAllImageInfos image_infos;
6871 Log *log(GetLog(mask: LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process |
6872 LLDBLog::DynamicLoader));
6873
6874 auto lc_notes = FindLC_NOTEByName(name: "all image infos");
6875 for (auto lc_note : lc_notes) {
6876 offset_t payload_offset = std::get<0>(t&: lc_note);
6877 // Read the struct all_image_infos_header.
6878 uint32_t version = m_data.GetU32(offset_ptr: &payload_offset);
6879 if (version != 1) {
6880 return image_infos;
6881 }
6882 uint32_t imgcount = m_data.GetU32(offset_ptr: &payload_offset);
6883 uint64_t entries_fileoff = m_data.GetU64(offset_ptr: &payload_offset);
6884 // 'entries_size' is not used, nor is the 'unused' entry.
6885 // offset += 4; // uint32_t entries_size;
6886 // offset += 4; // uint32_t unused;
6887
6888 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images",
6889 version, imgcount);
6890 payload_offset = entries_fileoff;
6891 for (uint32_t i = 0; i < imgcount; i++) {
6892 // Read the struct image_entry.
6893 offset_t filepath_offset = m_data.GetU64(offset_ptr: &payload_offset);
6894 uuid_t uuid;
6895 memcpy(dest: &uuid, src: m_data.GetData(offset_ptr: &payload_offset, length: sizeof(uuid_t)),
6896 n: sizeof(uuid_t));
6897 uint64_t load_address = m_data.GetU64(offset_ptr: &payload_offset);
6898 offset_t seg_addrs_offset = m_data.GetU64(offset_ptr: &payload_offset);
6899 uint32_t segment_count = m_data.GetU32(offset_ptr: &payload_offset);
6900 uint32_t currently_executing = m_data.GetU32(offset_ptr: &payload_offset);
6901
6902 MachOCorefileImageEntry image_entry;
6903 image_entry.filename = (const char *)m_data.GetCStr(offset_ptr: &filepath_offset);
6904 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
6905 image_entry.load_address = load_address;
6906 image_entry.currently_executing = currently_executing;
6907
6908 offset_t seg_vmaddrs_offset = seg_addrs_offset;
6909 for (uint32_t j = 0; j < segment_count; j++) {
6910 char segname[17];
6911 m_data.CopyData(offset: seg_vmaddrs_offset, length: 16, dst: segname);
6912 segname[16] = '\0';
6913 seg_vmaddrs_offset += 16;
6914 uint64_t vmaddr = m_data.GetU64(offset_ptr: &seg_vmaddrs_offset);
6915 seg_vmaddrs_offset += 8; /* unused */
6916
6917 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr};
6918 image_entry.segment_load_addresses.push_back(x: new_seg);
6919 }
6920 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s",
6921 image_entry.filename.c_str(),
6922 image_entry.uuid.GetAsString().c_str(),
6923 image_entry.load_address,
6924 image_entry.currently_executing ? "currently executing"
6925 : "not currently executing");
6926 image_infos.all_image_infos.push_back(x: image_entry);
6927 }
6928 }
6929
6930 lc_notes = FindLC_NOTEByName(name: "load binary");
6931 for (auto lc_note : lc_notes) {
6932 offset_t payload_offset = std::get<0>(t&: lc_note);
6933 uint32_t version = m_data.GetU32(offset_ptr: &payload_offset);
6934 if (version == 1) {
6935 uuid_t uuid;
6936 memcpy(dest: &uuid, src: m_data.GetData(offset_ptr: &payload_offset, length: sizeof(uuid_t)),
6937 n: sizeof(uuid_t));
6938 uint64_t load_address = m_data.GetU64(offset_ptr: &payload_offset);
6939 uint64_t slide = m_data.GetU64(offset_ptr: &payload_offset);
6940 std::string filename = m_data.GetCStr(offset_ptr: &payload_offset);
6941
6942 MachOCorefileImageEntry image_entry;
6943 image_entry.filename = filename;
6944 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
6945 image_entry.load_address = load_address;
6946 image_entry.slide = slide;
6947 image_entry.currently_executing = true;
6948 image_infos.all_image_infos.push_back(x: image_entry);
6949 LLDB_LOGF(log,
6950 "LC_NOTE 'load binary' found, filename %s uuid %s load "
6951 "address 0x%" PRIx64 " slide 0x%" PRIx64,
6952 filename.c_str(),
6953 image_entry.uuid.IsValid()
6954 ? image_entry.uuid.GetAsString().c_str()
6955 : "00000000-0000-0000-0000-000000000000",
6956 load_address, slide);
6957 }
6958 }
6959
6960 return image_infos;
6961}
6962
6963bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) {
6964 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos();
6965 Log *log = GetLog(mask: LLDBLog::Object | LLDBLog::DynamicLoader);
6966 Status error;
6967
6968 bool found_platform_binary = false;
6969 ModuleList added_modules;
6970 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) {
6971 ModuleSP module_sp, local_filesystem_module_sp;
6972
6973 // If this is a platform binary, it has been loaded (or registered with
6974 // the DynamicLoader to be loaded), we don't need to do any further
6975 // processing. We're not going to call ModulesDidLoad on this in this
6976 // method, so notify==true.
6977 if (process.GetTarget()
6978 .GetDebugger()
6979 .GetPlatformList()
6980 .LoadPlatformBinaryAndSetup(process: &process, addr: image.load_address,
6981 notify: true /* notify */)) {
6982 LLDB_LOGF(log,
6983 "ObjectFileMachO::%s binary at 0x%" PRIx64
6984 " is a platform binary, has been handled by a Platform plugin.",
6985 __FUNCTION__, image.load_address);
6986 continue;
6987 }
6988
6989 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS;
6990 uint64_t value = value_is_offset ? image.slide : image.load_address;
6991 if (value_is_offset && value == LLDB_INVALID_ADDRESS) {
6992 // We have neither address nor slide; so we will find the binary
6993 // by UUID and load it at slide/offset 0.
6994 value = 0;
6995 }
6996
6997 // We have either a UUID, or we have a load address which
6998 // and can try to read load commands and find a UUID.
6999 if (image.uuid.IsValid() ||
7000 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) {
7001 const bool set_load_address = image.segment_load_addresses.size() == 0;
7002 const bool notify = false;
7003 // Userland Darwin binaries will have segment load addresses via
7004 // the `all image infos` LC_NOTE.
7005 const bool allow_memory_image_last_resort =
7006 image.segment_load_addresses.size();
7007 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress(
7008 process: &process, name: image.filename, uuid: image.uuid, value, value_is_offset,
7009 force_symbol_search: image.currently_executing, notify, set_address_in_target: set_load_address,
7010 allow_memory_image_last_resort);
7011 }
7012
7013 // We have a ModuleSP to load in the Target. Load it at the
7014 // correct address/slide and notify/load scripting resources.
7015 if (module_sp) {
7016 added_modules.Append(module_sp, notify: false /* notify */);
7017
7018 // We have a list of segment load address
7019 if (image.segment_load_addresses.size() > 0) {
7020 if (log) {
7021 std::string uuidstr = image.uuid.GetAsString();
7022 log->Printf(format: "ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7023 "UUID %s with section load addresses",
7024 module_sp->GetFileSpec().GetPath().c_str(),
7025 uuidstr.c_str());
7026 }
7027 for (auto name_vmaddr_tuple : image.segment_load_addresses) {
7028 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList();
7029 if (sectlist) {
7030 SectionSP sect_sp =
7031 sectlist->FindSectionByName(section_dstr: std::get<0>(t&: name_vmaddr_tuple));
7032 if (sect_sp) {
7033 process.GetTarget().SetSectionLoadAddress(
7034 section: sect_sp, load_addr: std::get<1>(t&: name_vmaddr_tuple));
7035 }
7036 }
7037 }
7038 } else {
7039 if (log) {
7040 std::string uuidstr = image.uuid.GetAsString();
7041 log->Printf(format: "ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7042 "UUID %s with %s 0x%" PRIx64,
7043 module_sp->GetFileSpec().GetPath().c_str(),
7044 uuidstr.c_str(),
7045 value_is_offset ? "slide" : "load address", value);
7046 }
7047 bool changed;
7048 module_sp->SetLoadAddress(target&: process.GetTarget(), value, value_is_offset,
7049 changed);
7050 }
7051 }
7052 }
7053 if (added_modules.GetSize() > 0) {
7054 process.GetTarget().ModulesDidLoad(module_list&: added_modules);
7055 process.Flush();
7056 return true;
7057 }
7058 // Return true if the only binary we found was the platform binary,
7059 // and it was loaded outside the scope of this method.
7060 if (found_platform_binary)
7061 return true;
7062
7063 // No binaries.
7064 return false;
7065}
7066

source code of lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp