1//===-- ObjectFileMachO.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/ScopeExit.h"
10#include "llvm/ADT/StringRef.h"
11
12#include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
13#include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
14#include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
15#include "Plugins/Process/Utility/RegisterContextDarwin_riscv32.h"
16#include "Plugins/Process/Utility/RegisterContextDarwin_x86_64.h"
17#include "lldb/Core/Debugger.h"
18#include "lldb/Core/Module.h"
19#include "lldb/Core/ModuleSpec.h"
20#include "lldb/Core/PluginManager.h"
21#include "lldb/Core/Progress.h"
22#include "lldb/Core/Section.h"
23#include "lldb/Host/Host.h"
24#include "lldb/Symbol/DWARFCallFrameInfo.h"
25#include "lldb/Symbol/ObjectFile.h"
26#include "lldb/Target/DynamicLoader.h"
27#include "lldb/Target/MemoryRegionInfo.h"
28#include "lldb/Target/Platform.h"
29#include "lldb/Target/Process.h"
30#include "lldb/Target/SectionLoadList.h"
31#include "lldb/Target/Target.h"
32#include "lldb/Target/Thread.h"
33#include "lldb/Target/ThreadList.h"
34#include "lldb/Utility/ArchSpec.h"
35#include "lldb/Utility/DataBuffer.h"
36#include "lldb/Utility/FileSpec.h"
37#include "lldb/Utility/FileSpecList.h"
38#include "lldb/Utility/LLDBLog.h"
39#include "lldb/Utility/Log.h"
40#include "lldb/Utility/RangeMap.h"
41#include "lldb/Utility/RegisterValue.h"
42#include "lldb/Utility/Status.h"
43#include "lldb/Utility/StreamString.h"
44#include "lldb/Utility/Timer.h"
45#include "lldb/Utility/UUID.h"
46
47#include "lldb/Host/SafeMachO.h"
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/Support/FormatVariadic.h"
51#include "llvm/Support/MemoryBuffer.h"
52
53#include "ObjectFileMachO.h"
54
55#if defined(__APPLE__)
56#include <TargetConditionals.h>
57// GetLLDBSharedCacheUUID() needs to call dlsym()
58#include <dlfcn.h>
59#include <mach/mach_init.h>
60#include <mach/vm_map.h>
61#include <lldb/Host/SafeMachO.h>
62#endif
63
64#ifndef __APPLE__
65#include "lldb/Utility/AppleUuidCompatibility.h"
66#else
67#include <uuid/uuid.h>
68#endif
69
70#include <bitset>
71#include <memory>
72#include <optional>
73
74// Unfortunately the signpost header pulls in the system MachO header, too.
75#ifdef CPU_TYPE_ARM
76#undef CPU_TYPE_ARM
77#endif
78#ifdef CPU_TYPE_ARM64
79#undef CPU_TYPE_ARM64
80#endif
81#ifdef CPU_TYPE_ARM64_32
82#undef CPU_TYPE_ARM64_32
83#endif
84#ifdef CPU_TYPE_I386
85#undef CPU_TYPE_I386
86#endif
87#ifdef CPU_TYPE_X86_64
88#undef CPU_TYPE_X86_64
89#endif
90#ifdef MH_DYLINKER
91#undef MH_DYLINKER
92#endif
93#ifdef MH_OBJECT
94#undef MH_OBJECT
95#endif
96#ifdef LC_VERSION_MIN_MACOSX
97#undef LC_VERSION_MIN_MACOSX
98#endif
99#ifdef LC_VERSION_MIN_IPHONEOS
100#undef LC_VERSION_MIN_IPHONEOS
101#endif
102#ifdef LC_VERSION_MIN_TVOS
103#undef LC_VERSION_MIN_TVOS
104#endif
105#ifdef LC_VERSION_MIN_WATCHOS
106#undef LC_VERSION_MIN_WATCHOS
107#endif
108#ifdef LC_BUILD_VERSION
109#undef LC_BUILD_VERSION
110#endif
111#ifdef PLATFORM_MACOS
112#undef PLATFORM_MACOS
113#endif
114#ifdef PLATFORM_MACCATALYST
115#undef PLATFORM_MACCATALYST
116#endif
117#ifdef PLATFORM_IOS
118#undef PLATFORM_IOS
119#endif
120#ifdef PLATFORM_IOSSIMULATOR
121#undef PLATFORM_IOSSIMULATOR
122#endif
123#ifdef PLATFORM_TVOS
124#undef PLATFORM_TVOS
125#endif
126#ifdef PLATFORM_TVOSSIMULATOR
127#undef PLATFORM_TVOSSIMULATOR
128#endif
129#ifdef PLATFORM_WATCHOS
130#undef PLATFORM_WATCHOS
131#endif
132#ifdef PLATFORM_WATCHOSSIMULATOR
133#undef PLATFORM_WATCHOSSIMULATOR
134#endif
135
136#define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
137using namespace lldb;
138using namespace lldb_private;
139using namespace llvm::MachO;
140
141static constexpr llvm::StringLiteral g_loader_path = "@loader_path";
142static constexpr llvm::StringLiteral g_executable_path = "@executable_path";
143
144LLDB_PLUGIN_DEFINE(ObjectFileMachO)
145
146static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
147 const char *alt_name, size_t reg_byte_size,
148 Stream &data) {
149 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name: name);
150 if (reg_info == nullptr)
151 reg_info = reg_ctx->GetRegisterInfoByName(reg_name: alt_name);
152 if (reg_info) {
153 lldb_private::RegisterValue reg_value;
154 if (reg_ctx->ReadRegister(reg_info, reg_value)) {
155 if (reg_info->byte_size >= reg_byte_size)
156 data.Write(src: reg_value.GetBytes(), src_len: reg_byte_size);
157 else {
158 data.Write(src: reg_value.GetBytes(), src_len: reg_info->byte_size);
159 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
160 data.PutChar(ch: 0);
161 }
162 return;
163 }
164 }
165 // Just write zeros if all else fails
166 for (size_t i = 0; i < reg_byte_size; ++i)
167 data.PutChar(ch: 0);
168}
169
170class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
171public:
172 RegisterContextDarwin_x86_64_Mach(lldb_private::Thread &thread,
173 const DataExtractor &data)
174 : RegisterContextDarwin_x86_64(thread, 0) {
175 SetRegisterDataFrom_LC_THREAD(data);
176 }
177
178 void InvalidateAllRegisters() override {
179 // Do nothing... registers are always valid...
180 }
181
182 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
183 lldb::offset_t offset = 0;
184 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
185 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
186 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
187 bool done = false;
188
189 while (!done) {
190 int flavor = data.GetU32(offset_ptr: &offset);
191 if (flavor == 0)
192 done = true;
193 else {
194 uint32_t i;
195 uint32_t count = data.GetU32(offset_ptr: &offset);
196 switch (flavor) {
197 case GPRRegSet:
198 for (i = 0; i < count; ++i)
199 (&gpr.rax)[i] = data.GetU64(offset_ptr: &offset);
200 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
201 done = true;
202
203 break;
204 case FPURegSet:
205 // TODO: fill in FPU regs....
206 // SetError (FPURegSet, Read, -1);
207 done = true;
208
209 break;
210 case EXCRegSet:
211 exc.trapno = data.GetU32(offset_ptr: &offset);
212 exc.err = data.GetU32(offset_ptr: &offset);
213 exc.faultvaddr = data.GetU64(offset_ptr: &offset);
214 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
215 done = true;
216 break;
217 case 7:
218 case 8:
219 case 9:
220 // fancy flavors that encapsulate of the above flavors...
221 break;
222
223 default:
224 done = true;
225 break;
226 }
227 }
228 }
229 }
230
231 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
232 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
233 if (reg_ctx_sp) {
234 RegisterContext *reg_ctx = reg_ctx_sp.get();
235
236 data.PutHex32(uvalue: GPRRegSet); // Flavor
237 data.PutHex32(uvalue: GPRWordCount);
238 PrintRegisterValue(reg_ctx, name: "rax", alt_name: nullptr, reg_byte_size: 8, data);
239 PrintRegisterValue(reg_ctx, name: "rbx", alt_name: nullptr, reg_byte_size: 8, data);
240 PrintRegisterValue(reg_ctx, name: "rcx", alt_name: nullptr, reg_byte_size: 8, data);
241 PrintRegisterValue(reg_ctx, name: "rdx", alt_name: nullptr, reg_byte_size: 8, data);
242 PrintRegisterValue(reg_ctx, name: "rdi", alt_name: nullptr, reg_byte_size: 8, data);
243 PrintRegisterValue(reg_ctx, name: "rsi", alt_name: nullptr, reg_byte_size: 8, data);
244 PrintRegisterValue(reg_ctx, name: "rbp", alt_name: nullptr, reg_byte_size: 8, data);
245 PrintRegisterValue(reg_ctx, name: "rsp", alt_name: nullptr, reg_byte_size: 8, data);
246 PrintRegisterValue(reg_ctx, name: "r8", alt_name: nullptr, reg_byte_size: 8, data);
247 PrintRegisterValue(reg_ctx, name: "r9", alt_name: nullptr, reg_byte_size: 8, data);
248 PrintRegisterValue(reg_ctx, name: "r10", alt_name: nullptr, reg_byte_size: 8, data);
249 PrintRegisterValue(reg_ctx, name: "r11", alt_name: nullptr, reg_byte_size: 8, data);
250 PrintRegisterValue(reg_ctx, name: "r12", alt_name: nullptr, reg_byte_size: 8, data);
251 PrintRegisterValue(reg_ctx, name: "r13", alt_name: nullptr, reg_byte_size: 8, data);
252 PrintRegisterValue(reg_ctx, name: "r14", alt_name: nullptr, reg_byte_size: 8, data);
253 PrintRegisterValue(reg_ctx, name: "r15", alt_name: nullptr, reg_byte_size: 8, data);
254 PrintRegisterValue(reg_ctx, name: "rip", alt_name: nullptr, reg_byte_size: 8, data);
255 PrintRegisterValue(reg_ctx, name: "rflags", alt_name: nullptr, reg_byte_size: 8, data);
256 PrintRegisterValue(reg_ctx, name: "cs", alt_name: nullptr, reg_byte_size: 8, data);
257 PrintRegisterValue(reg_ctx, name: "fs", alt_name: nullptr, reg_byte_size: 8, data);
258 PrintRegisterValue(reg_ctx, name: "gs", alt_name: nullptr, reg_byte_size: 8, data);
259
260 // // Write out the FPU registers
261 // const size_t fpu_byte_size = sizeof(FPU);
262 // size_t bytes_written = 0;
263 // data.PutHex32 (FPURegSet);
264 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
265 // bytes_written += data.PutHex32(0); // uint32_t pad[0]
266 // bytes_written += data.PutHex32(0); // uint32_t pad[1]
267 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
268 // data); // uint16_t fcw; // "fctrl"
269 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
270 // data); // uint16_t fsw; // "fstat"
271 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
272 // data); // uint8_t ftw; // "ftag"
273 // bytes_written += data.PutHex8 (0); // uint8_t pad1;
274 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
275 // data); // uint16_t fop; // "fop"
276 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
277 // data); // uint32_t ip; // "fioff"
278 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
279 // data); // uint16_t cs; // "fiseg"
280 // bytes_written += data.PutHex16 (0); // uint16_t pad2;
281 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
282 // data); // uint32_t dp; // "fooff"
283 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
284 // data); // uint16_t ds; // "foseg"
285 // bytes_written += data.PutHex16 (0); // uint16_t pad3;
286 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
287 // data); // uint32_t mxcsr;
288 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
289 // 4, data);// uint32_t mxcsrmask;
290 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
291 // sizeof(MMSReg), data);
292 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
293 // sizeof(MMSReg), data);
294 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
295 // sizeof(MMSReg), data);
296 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
297 // sizeof(MMSReg), data);
298 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
299 // sizeof(MMSReg), data);
300 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
301 // sizeof(MMSReg), data);
302 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
303 // sizeof(MMSReg), data);
304 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
305 // sizeof(MMSReg), data);
306 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
307 // sizeof(XMMReg), data);
308 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
309 // sizeof(XMMReg), data);
310 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
311 // sizeof(XMMReg), data);
312 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
313 // sizeof(XMMReg), data);
314 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
315 // sizeof(XMMReg), data);
316 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
317 // sizeof(XMMReg), data);
318 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
319 // sizeof(XMMReg), data);
320 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
321 // sizeof(XMMReg), data);
322 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
323 // sizeof(XMMReg), data);
324 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
325 // sizeof(XMMReg), data);
326 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
327 // sizeof(XMMReg), data);
328 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
329 // sizeof(XMMReg), data);
330 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
331 // sizeof(XMMReg), data);
332 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
333 // sizeof(XMMReg), data);
334 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
335 // sizeof(XMMReg), data);
336 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
337 // sizeof(XMMReg), data);
338 //
339 // // Fill rest with zeros
340 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
341 // i)
342 // data.PutChar(0);
343
344 // Write out the EXC registers
345 data.PutHex32(uvalue: EXCRegSet);
346 data.PutHex32(uvalue: EXCWordCount);
347 PrintRegisterValue(reg_ctx, name: "trapno", alt_name: nullptr, reg_byte_size: 4, data);
348 PrintRegisterValue(reg_ctx, name: "err", alt_name: nullptr, reg_byte_size: 4, data);
349 PrintRegisterValue(reg_ctx, name: "faultvaddr", alt_name: nullptr, reg_byte_size: 8, data);
350 return true;
351 }
352 return false;
353 }
354
355protected:
356 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
357
358 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
359
360 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
361
362 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
363 return 0;
364 }
365
366 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
367 return 0;
368 }
369
370 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
371 return 0;
372 }
373};
374
375class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 {
376public:
377 RegisterContextDarwin_i386_Mach(lldb_private::Thread &thread,
378 const DataExtractor &data)
379 : RegisterContextDarwin_i386(thread, 0) {
380 SetRegisterDataFrom_LC_THREAD(data);
381 }
382
383 void InvalidateAllRegisters() override {
384 // Do nothing... registers are always valid...
385 }
386
387 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
388 lldb::offset_t offset = 0;
389 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
390 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
391 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
392 bool done = false;
393
394 while (!done) {
395 int flavor = data.GetU32(offset_ptr: &offset);
396 if (flavor == 0)
397 done = true;
398 else {
399 uint32_t i;
400 uint32_t count = data.GetU32(offset_ptr: &offset);
401 switch (flavor) {
402 case GPRRegSet:
403 for (i = 0; i < count; ++i)
404 (&gpr.eax)[i] = data.GetU32(offset_ptr: &offset);
405 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
406 done = true;
407
408 break;
409 case FPURegSet:
410 // TODO: fill in FPU regs....
411 // SetError (FPURegSet, Read, -1);
412 done = true;
413
414 break;
415 case EXCRegSet:
416 exc.trapno = data.GetU32(offset_ptr: &offset);
417 exc.err = data.GetU32(offset_ptr: &offset);
418 exc.faultvaddr = data.GetU32(offset_ptr: &offset);
419 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
420 done = true;
421 break;
422 case 7:
423 case 8:
424 case 9:
425 // fancy flavors that encapsulate of the above flavors...
426 break;
427
428 default:
429 done = true;
430 break;
431 }
432 }
433 }
434 }
435
436 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
437 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
438 if (reg_ctx_sp) {
439 RegisterContext *reg_ctx = reg_ctx_sp.get();
440
441 data.PutHex32(uvalue: GPRRegSet); // Flavor
442 data.PutHex32(uvalue: GPRWordCount);
443 PrintRegisterValue(reg_ctx, name: "eax", alt_name: nullptr, reg_byte_size: 4, data);
444 PrintRegisterValue(reg_ctx, name: "ebx", alt_name: nullptr, reg_byte_size: 4, data);
445 PrintRegisterValue(reg_ctx, name: "ecx", alt_name: nullptr, reg_byte_size: 4, data);
446 PrintRegisterValue(reg_ctx, name: "edx", alt_name: nullptr, reg_byte_size: 4, data);
447 PrintRegisterValue(reg_ctx, name: "edi", alt_name: nullptr, reg_byte_size: 4, data);
448 PrintRegisterValue(reg_ctx, name: "esi", alt_name: nullptr, reg_byte_size: 4, data);
449 PrintRegisterValue(reg_ctx, name: "ebp", alt_name: nullptr, reg_byte_size: 4, data);
450 PrintRegisterValue(reg_ctx, name: "esp", alt_name: nullptr, reg_byte_size: 4, data);
451 PrintRegisterValue(reg_ctx, name: "ss", alt_name: nullptr, reg_byte_size: 4, data);
452 PrintRegisterValue(reg_ctx, name: "eflags", alt_name: nullptr, reg_byte_size: 4, data);
453 PrintRegisterValue(reg_ctx, name: "eip", alt_name: nullptr, reg_byte_size: 4, data);
454 PrintRegisterValue(reg_ctx, name: "cs", alt_name: nullptr, reg_byte_size: 4, data);
455 PrintRegisterValue(reg_ctx, name: "ds", alt_name: nullptr, reg_byte_size: 4, data);
456 PrintRegisterValue(reg_ctx, name: "es", alt_name: nullptr, reg_byte_size: 4, data);
457 PrintRegisterValue(reg_ctx, name: "fs", alt_name: nullptr, reg_byte_size: 4, data);
458 PrintRegisterValue(reg_ctx, name: "gs", alt_name: nullptr, reg_byte_size: 4, data);
459
460 // Write out the EXC registers
461 data.PutHex32(uvalue: EXCRegSet);
462 data.PutHex32(uvalue: EXCWordCount);
463 PrintRegisterValue(reg_ctx, name: "trapno", alt_name: nullptr, reg_byte_size: 4, data);
464 PrintRegisterValue(reg_ctx, name: "err", alt_name: nullptr, reg_byte_size: 4, data);
465 PrintRegisterValue(reg_ctx, name: "faultvaddr", alt_name: nullptr, reg_byte_size: 4, data);
466 return true;
467 }
468 return false;
469 }
470
471protected:
472 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
473
474 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
475
476 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
477
478 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
479 return 0;
480 }
481
482 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
483 return 0;
484 }
485
486 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
487 return 0;
488 }
489};
490
491class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm {
492public:
493 RegisterContextDarwin_arm_Mach(lldb_private::Thread &thread,
494 const DataExtractor &data)
495 : RegisterContextDarwin_arm(thread, 0) {
496 SetRegisterDataFrom_LC_THREAD(data);
497 }
498
499 void InvalidateAllRegisters() override {
500 // Do nothing... registers are always valid...
501 }
502
503 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
504 lldb::offset_t offset = 0;
505 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
506 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
507 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
508 bool done = false;
509
510 while (!done) {
511 int flavor = data.GetU32(offset_ptr: &offset);
512 uint32_t count = data.GetU32(offset_ptr: &offset);
513 lldb::offset_t next_thread_state = offset + (count * 4);
514 switch (flavor) {
515 case GPRAltRegSet:
516 case GPRRegSet: {
517 // r0-r15, plus CPSR
518 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1;
519 if (count == gpr_buf_count) {
520 for (uint32_t i = 0; i < (count - 1); ++i) {
521 gpr.r[i] = data.GetU32(offset_ptr: &offset);
522 }
523 gpr.cpsr = data.GetU32(offset_ptr: &offset);
524
525 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
526 }
527 }
528 offset = next_thread_state;
529 break;
530
531 case FPURegSet: {
532 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
533 const int fpu_reg_buf_size = sizeof(fpu.floats);
534 if (data.ExtractBytes(offset, length: fpu_reg_buf_size, dst_byte_order: eByteOrderLittle,
535 dst: fpu_reg_buf) == fpu_reg_buf_size) {
536 offset += fpu_reg_buf_size;
537 fpu.fpscr = data.GetU32(offset_ptr: &offset);
538 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
539 } else {
540 done = true;
541 }
542 }
543 offset = next_thread_state;
544 break;
545
546 case EXCRegSet:
547 if (count == 3) {
548 exc.exception = data.GetU32(offset_ptr: &offset);
549 exc.fsr = data.GetU32(offset_ptr: &offset);
550 exc.far = data.GetU32(offset_ptr: &offset);
551 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
552 }
553 done = true;
554 offset = next_thread_state;
555 break;
556
557 // Unknown register set flavor, stop trying to parse.
558 default:
559 done = true;
560 }
561 }
562 }
563
564 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
565 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
566 if (reg_ctx_sp) {
567 RegisterContext *reg_ctx = reg_ctx_sp.get();
568
569 data.PutHex32(uvalue: GPRRegSet); // Flavor
570 data.PutHex32(uvalue: GPRWordCount);
571 PrintRegisterValue(reg_ctx, name: "r0", alt_name: nullptr, reg_byte_size: 4, data);
572 PrintRegisterValue(reg_ctx, name: "r1", alt_name: nullptr, reg_byte_size: 4, data);
573 PrintRegisterValue(reg_ctx, name: "r2", alt_name: nullptr, reg_byte_size: 4, data);
574 PrintRegisterValue(reg_ctx, name: "r3", alt_name: nullptr, reg_byte_size: 4, data);
575 PrintRegisterValue(reg_ctx, name: "r4", alt_name: nullptr, reg_byte_size: 4, data);
576 PrintRegisterValue(reg_ctx, name: "r5", alt_name: nullptr, reg_byte_size: 4, data);
577 PrintRegisterValue(reg_ctx, name: "r6", alt_name: nullptr, reg_byte_size: 4, data);
578 PrintRegisterValue(reg_ctx, name: "r7", alt_name: nullptr, reg_byte_size: 4, data);
579 PrintRegisterValue(reg_ctx, name: "r8", alt_name: nullptr, reg_byte_size: 4, data);
580 PrintRegisterValue(reg_ctx, name: "r9", alt_name: nullptr, reg_byte_size: 4, data);
581 PrintRegisterValue(reg_ctx, name: "r10", alt_name: nullptr, reg_byte_size: 4, data);
582 PrintRegisterValue(reg_ctx, name: "r11", alt_name: nullptr, reg_byte_size: 4, data);
583 PrintRegisterValue(reg_ctx, name: "r12", alt_name: nullptr, reg_byte_size: 4, data);
584 PrintRegisterValue(reg_ctx, name: "sp", alt_name: nullptr, reg_byte_size: 4, data);
585 PrintRegisterValue(reg_ctx, name: "lr", alt_name: nullptr, reg_byte_size: 4, data);
586 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 4, data);
587 PrintRegisterValue(reg_ctx, name: "cpsr", alt_name: nullptr, reg_byte_size: 4, data);
588
589 // Write out the EXC registers
590 // data.PutHex32 (EXCRegSet);
591 // data.PutHex32 (EXCWordCount);
592 // WriteRegister (reg_ctx, "exception", NULL, 4, data);
593 // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
594 // WriteRegister (reg_ctx, "far", NULL, 4, data);
595 return true;
596 }
597 return false;
598 }
599
600protected:
601 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
602
603 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
604
605 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
606
607 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
608
609 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
610 return 0;
611 }
612
613 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
614 return 0;
615 }
616
617 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
618 return 0;
619 }
620
621 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
622 return -1;
623 }
624};
625
626class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 {
627public:
628 RegisterContextDarwin_arm64_Mach(lldb_private::Thread &thread,
629 const DataExtractor &data)
630 : RegisterContextDarwin_arm64(thread, 0) {
631 SetRegisterDataFrom_LC_THREAD(data);
632 }
633
634 void InvalidateAllRegisters() override {
635 // Do nothing... registers are always valid...
636 }
637
638 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
639 lldb::offset_t offset = 0;
640 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
641 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
642 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
643 bool done = false;
644 while (!done) {
645 int flavor = data.GetU32(offset_ptr: &offset);
646 uint32_t count = data.GetU32(offset_ptr: &offset);
647 lldb::offset_t next_thread_state = offset + (count * 4);
648 switch (flavor) {
649 case GPRRegSet:
650 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
651 // 32-bit register)
652 if (count >= (33 * 2) + 1) {
653 for (uint32_t i = 0; i < 29; ++i)
654 gpr.x[i] = data.GetU64(offset_ptr: &offset);
655 gpr.fp = data.GetU64(offset_ptr: &offset);
656 gpr.lr = data.GetU64(offset_ptr: &offset);
657 gpr.sp = data.GetU64(offset_ptr: &offset);
658 gpr.pc = data.GetU64(offset_ptr: &offset);
659 gpr.cpsr = data.GetU32(offset_ptr: &offset);
660 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
661 }
662 offset = next_thread_state;
663 break;
664 case FPURegSet: {
665 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
666 const int fpu_reg_buf_size = sizeof(fpu);
667 if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
668 data.ExtractBytes(offset, length: fpu_reg_buf_size, dst_byte_order: eByteOrderLittle,
669 dst: fpu_reg_buf) == fpu_reg_buf_size) {
670 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
671 } else {
672 done = true;
673 }
674 }
675 offset = next_thread_state;
676 break;
677 case EXCRegSet:
678 if (count == 4) {
679 exc.far = data.GetU64(offset_ptr: &offset);
680 exc.esr = data.GetU32(offset_ptr: &offset);
681 exc.exception = data.GetU32(offset_ptr: &offset);
682 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
683 }
684 offset = next_thread_state;
685 break;
686 default:
687 done = true;
688 break;
689 }
690 }
691 }
692
693 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
694 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
695 if (reg_ctx_sp) {
696 RegisterContext *reg_ctx = reg_ctx_sp.get();
697
698 data.PutHex32(uvalue: GPRRegSet); // Flavor
699 data.PutHex32(uvalue: GPRWordCount);
700 PrintRegisterValue(reg_ctx, name: "x0", alt_name: nullptr, reg_byte_size: 8, data);
701 PrintRegisterValue(reg_ctx, name: "x1", alt_name: nullptr, reg_byte_size: 8, data);
702 PrintRegisterValue(reg_ctx, name: "x2", alt_name: nullptr, reg_byte_size: 8, data);
703 PrintRegisterValue(reg_ctx, name: "x3", alt_name: nullptr, reg_byte_size: 8, data);
704 PrintRegisterValue(reg_ctx, name: "x4", alt_name: nullptr, reg_byte_size: 8, data);
705 PrintRegisterValue(reg_ctx, name: "x5", alt_name: nullptr, reg_byte_size: 8, data);
706 PrintRegisterValue(reg_ctx, name: "x6", alt_name: nullptr, reg_byte_size: 8, data);
707 PrintRegisterValue(reg_ctx, name: "x7", alt_name: nullptr, reg_byte_size: 8, data);
708 PrintRegisterValue(reg_ctx, name: "x8", alt_name: nullptr, reg_byte_size: 8, data);
709 PrintRegisterValue(reg_ctx, name: "x9", alt_name: nullptr, reg_byte_size: 8, data);
710 PrintRegisterValue(reg_ctx, name: "x10", alt_name: nullptr, reg_byte_size: 8, data);
711 PrintRegisterValue(reg_ctx, name: "x11", alt_name: nullptr, reg_byte_size: 8, data);
712 PrintRegisterValue(reg_ctx, name: "x12", alt_name: nullptr, reg_byte_size: 8, data);
713 PrintRegisterValue(reg_ctx, name: "x13", alt_name: nullptr, reg_byte_size: 8, data);
714 PrintRegisterValue(reg_ctx, name: "x14", alt_name: nullptr, reg_byte_size: 8, data);
715 PrintRegisterValue(reg_ctx, name: "x15", alt_name: nullptr, reg_byte_size: 8, data);
716 PrintRegisterValue(reg_ctx, name: "x16", alt_name: nullptr, reg_byte_size: 8, data);
717 PrintRegisterValue(reg_ctx, name: "x17", alt_name: nullptr, reg_byte_size: 8, data);
718 PrintRegisterValue(reg_ctx, name: "x18", alt_name: nullptr, reg_byte_size: 8, data);
719 PrintRegisterValue(reg_ctx, name: "x19", alt_name: nullptr, reg_byte_size: 8, data);
720 PrintRegisterValue(reg_ctx, name: "x20", alt_name: nullptr, reg_byte_size: 8, data);
721 PrintRegisterValue(reg_ctx, name: "x21", alt_name: nullptr, reg_byte_size: 8, data);
722 PrintRegisterValue(reg_ctx, name: "x22", alt_name: nullptr, reg_byte_size: 8, data);
723 PrintRegisterValue(reg_ctx, name: "x23", alt_name: nullptr, reg_byte_size: 8, data);
724 PrintRegisterValue(reg_ctx, name: "x24", alt_name: nullptr, reg_byte_size: 8, data);
725 PrintRegisterValue(reg_ctx, name: "x25", alt_name: nullptr, reg_byte_size: 8, data);
726 PrintRegisterValue(reg_ctx, name: "x26", alt_name: nullptr, reg_byte_size: 8, data);
727 PrintRegisterValue(reg_ctx, name: "x27", alt_name: nullptr, reg_byte_size: 8, data);
728 PrintRegisterValue(reg_ctx, name: "x28", alt_name: nullptr, reg_byte_size: 8, data);
729 PrintRegisterValue(reg_ctx, name: "fp", alt_name: nullptr, reg_byte_size: 8, data);
730 PrintRegisterValue(reg_ctx, name: "lr", alt_name: nullptr, reg_byte_size: 8, data);
731 PrintRegisterValue(reg_ctx, name: "sp", alt_name: nullptr, reg_byte_size: 8, data);
732 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 8, data);
733 PrintRegisterValue(reg_ctx, name: "cpsr", alt_name: nullptr, reg_byte_size: 4, data);
734 data.PutHex32(uvalue: 0); // uint32_t pad at the end
735
736 // Write out the EXC registers
737 data.PutHex32(uvalue: EXCRegSet);
738 data.PutHex32(uvalue: EXCWordCount);
739 PrintRegisterValue(reg_ctx, name: "far", alt_name: nullptr, reg_byte_size: 8, data);
740 PrintRegisterValue(reg_ctx, name: "esr", alt_name: nullptr, reg_byte_size: 4, data);
741 PrintRegisterValue(reg_ctx, name: "exception", alt_name: nullptr, reg_byte_size: 4, data);
742 return true;
743 }
744 return false;
745 }
746
747protected:
748 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
749
750 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
751
752 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
753
754 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
755
756 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
757 return 0;
758 }
759
760 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
761 return 0;
762 }
763
764 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
765 return 0;
766 }
767
768 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
769 return -1;
770 }
771};
772
773class RegisterContextDarwin_riscv32_Mach
774 : public RegisterContextDarwin_riscv32 {
775public:
776 RegisterContextDarwin_riscv32_Mach(lldb_private::Thread &thread,
777 const DataExtractor &data)
778 : RegisterContextDarwin_riscv32(thread, 0) {
779 SetRegisterDataFrom_LC_THREAD(data);
780 }
781
782 void InvalidateAllRegisters() override {
783 // Do nothing... registers are always valid...
784 }
785
786 void SetRegisterDataFrom_LC_THREAD(const DataExtractor &data) {
787 lldb::offset_t offset = 0;
788 SetError(flavor: GPRRegSet, err_idx: Read, err: -1);
789 SetError(flavor: FPURegSet, err_idx: Read, err: -1);
790 SetError(flavor: EXCRegSet, err_idx: Read, err: -1);
791 SetError(flavor: CSRRegSet, err_idx: Read, err: -1);
792 bool done = false;
793 while (!done) {
794 int flavor = data.GetU32(offset_ptr: &offset);
795 uint32_t count = data.GetU32(offset_ptr: &offset);
796 lldb::offset_t next_thread_state = offset + (count * 4);
797 switch (flavor) {
798 case GPRRegSet:
799 // x0-x31 + pc
800 if (count >= 32) {
801 for (uint32_t i = 0; i < 32; ++i)
802 ((uint32_t *)&gpr.x0)[i] = data.GetU32(offset_ptr: &offset);
803 gpr.pc = data.GetU32(offset_ptr: &offset);
804 SetError(flavor: GPRRegSet, err_idx: Read, err: 0);
805 }
806 offset = next_thread_state;
807 break;
808 case FPURegSet: {
809 // f0-f31 + fcsr
810 if (count >= 32) {
811 for (uint32_t i = 0; i < 32; ++i)
812 ((uint32_t *)&fpr.f0)[i] = data.GetU32(offset_ptr: &offset);
813 fpr.fcsr = data.GetU32(offset_ptr: &offset);
814 SetError(flavor: FPURegSet, err_idx: Read, err: 0);
815 }
816 }
817 offset = next_thread_state;
818 break;
819 case EXCRegSet:
820 if (count == 3) {
821 exc.exception = data.GetU32(offset_ptr: &offset);
822 exc.fsr = data.GetU32(offset_ptr: &offset);
823 exc.far = data.GetU32(offset_ptr: &offset);
824 SetError(flavor: EXCRegSet, err_idx: Read, err: 0);
825 }
826 offset = next_thread_state;
827 break;
828 default:
829 done = true;
830 break;
831 }
832 }
833 }
834
835 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
836 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
837 if (reg_ctx_sp) {
838 RegisterContext *reg_ctx = reg_ctx_sp.get();
839
840 data.PutHex32(uvalue: GPRRegSet); // Flavor
841 data.PutHex32(uvalue: GPRWordCount);
842 PrintRegisterValue(reg_ctx, name: "x0", alt_name: nullptr, reg_byte_size: 4, data);
843 PrintRegisterValue(reg_ctx, name: "x1", alt_name: nullptr, reg_byte_size: 4, data);
844 PrintRegisterValue(reg_ctx, name: "x2", alt_name: nullptr, reg_byte_size: 4, data);
845 PrintRegisterValue(reg_ctx, name: "x3", alt_name: nullptr, reg_byte_size: 4, data);
846 PrintRegisterValue(reg_ctx, name: "x4", alt_name: nullptr, reg_byte_size: 4, data);
847 PrintRegisterValue(reg_ctx, name: "x5", alt_name: nullptr, reg_byte_size: 4, data);
848 PrintRegisterValue(reg_ctx, name: "x6", alt_name: nullptr, reg_byte_size: 4, data);
849 PrintRegisterValue(reg_ctx, name: "x7", alt_name: nullptr, reg_byte_size: 4, data);
850 PrintRegisterValue(reg_ctx, name: "x8", alt_name: nullptr, reg_byte_size: 4, data);
851 PrintRegisterValue(reg_ctx, name: "x9", alt_name: nullptr, reg_byte_size: 4, data);
852 PrintRegisterValue(reg_ctx, name: "x10", alt_name: nullptr, reg_byte_size: 4, data);
853 PrintRegisterValue(reg_ctx, name: "x11", alt_name: nullptr, reg_byte_size: 4, data);
854 PrintRegisterValue(reg_ctx, name: "x12", alt_name: nullptr, reg_byte_size: 4, data);
855 PrintRegisterValue(reg_ctx, name: "x13", alt_name: nullptr, reg_byte_size: 4, data);
856 PrintRegisterValue(reg_ctx, name: "x14", alt_name: nullptr, reg_byte_size: 4, data);
857 PrintRegisterValue(reg_ctx, name: "x15", alt_name: nullptr, reg_byte_size: 4, data);
858 PrintRegisterValue(reg_ctx, name: "x16", alt_name: nullptr, reg_byte_size: 4, data);
859 PrintRegisterValue(reg_ctx, name: "x17", alt_name: nullptr, reg_byte_size: 4, data);
860 PrintRegisterValue(reg_ctx, name: "x18", alt_name: nullptr, reg_byte_size: 4, data);
861 PrintRegisterValue(reg_ctx, name: "x19", alt_name: nullptr, reg_byte_size: 4, data);
862 PrintRegisterValue(reg_ctx, name: "x20", alt_name: nullptr, reg_byte_size: 4, data);
863 PrintRegisterValue(reg_ctx, name: "x21", alt_name: nullptr, reg_byte_size: 4, data);
864 PrintRegisterValue(reg_ctx, name: "x22", alt_name: nullptr, reg_byte_size: 4, data);
865 PrintRegisterValue(reg_ctx, name: "x23", alt_name: nullptr, reg_byte_size: 4, data);
866 PrintRegisterValue(reg_ctx, name: "x24", alt_name: nullptr, reg_byte_size: 4, data);
867 PrintRegisterValue(reg_ctx, name: "x25", alt_name: nullptr, reg_byte_size: 4, data);
868 PrintRegisterValue(reg_ctx, name: "x26", alt_name: nullptr, reg_byte_size: 4, data);
869 PrintRegisterValue(reg_ctx, name: "x27", alt_name: nullptr, reg_byte_size: 4, data);
870 PrintRegisterValue(reg_ctx, name: "x28", alt_name: nullptr, reg_byte_size: 4, data);
871 PrintRegisterValue(reg_ctx, name: "x29", alt_name: nullptr, reg_byte_size: 4, data);
872 PrintRegisterValue(reg_ctx, name: "x30", alt_name: nullptr, reg_byte_size: 4, data);
873 PrintRegisterValue(reg_ctx, name: "x31", alt_name: nullptr, reg_byte_size: 4, data);
874 PrintRegisterValue(reg_ctx, name: "pc", alt_name: nullptr, reg_byte_size: 4, data);
875 data.PutHex32(uvalue: 0); // uint32_t pad at the end
876
877 // Write out the EXC registers
878 data.PutHex32(uvalue: EXCRegSet);
879 data.PutHex32(uvalue: EXCWordCount);
880 PrintRegisterValue(reg_ctx, name: "exception", alt_name: nullptr, reg_byte_size: 4, data);
881 PrintRegisterValue(reg_ctx, name: "fsr", alt_name: nullptr, reg_byte_size: 4, data);
882 PrintRegisterValue(reg_ctx, name: "far", alt_name: nullptr, reg_byte_size: 4, data);
883 return true;
884 }
885 return false;
886 }
887
888protected:
889 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
890
891 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
892
893 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
894
895 int DoReadCSR(lldb::tid_t tid, int flavor, CSR &csr) override { return -1; }
896
897 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
898 return 0;
899 }
900
901 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
902 return 0;
903 }
904
905 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
906 return 0;
907 }
908
909 int DoWriteCSR(lldb::tid_t tid, int flavor, const CSR &csr) override {
910 return 0;
911 }
912};
913
914static uint32_t MachHeaderSizeFromMagic(uint32_t magic) {
915 switch (magic) {
916 case MH_MAGIC:
917 case MH_CIGAM:
918 return sizeof(struct llvm::MachO::mach_header);
919
920 case MH_MAGIC_64:
921 case MH_CIGAM_64:
922 return sizeof(struct llvm::MachO::mach_header_64);
923 break;
924
925 default:
926 break;
927 }
928 return 0;
929}
930
931#define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
932
933char ObjectFileMachO::ID;
934
935void ObjectFileMachO::Initialize() {
936 PluginManager::RegisterPlugin(
937 name: GetPluginNameStatic(), description: GetPluginDescriptionStatic(), create_callback: CreateInstance,
938 create_memory_callback: CreateMemoryInstance, get_module_specifications: GetModuleSpecifications, save_core: SaveCore);
939}
940
941void ObjectFileMachO::Terminate() {
942 PluginManager::UnregisterPlugin(create_callback: CreateInstance);
943}
944
945ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
946 DataBufferSP data_sp,
947 lldb::offset_t data_offset,
948 const FileSpec *file,
949 lldb::offset_t file_offset,
950 lldb::offset_t length) {
951 if (!data_sp) {
952 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
953 if (!data_sp)
954 return nullptr;
955 data_offset = 0;
956 }
957
958 if (!ObjectFileMachO::MagicBytesMatch(data_sp, offset: data_offset, length))
959 return nullptr;
960
961 // Update the data to contain the entire file if it doesn't already
962 if (data_sp->GetByteSize() < length) {
963 data_sp = MapFileData(file: *file, Size: length, Offset: file_offset);
964 if (!data_sp)
965 return nullptr;
966 data_offset = 0;
967 }
968 auto objfile_up = std::make_unique<ObjectFileMachO>(
969 args: module_sp, args&: data_sp, args&: data_offset, args&: file, args&: file_offset, args&: length);
970 if (!objfile_up || !objfile_up->ParseHeader())
971 return nullptr;
972
973 return objfile_up.release();
974}
975
976ObjectFile *ObjectFileMachO::CreateMemoryInstance(
977 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
978 const ProcessSP &process_sp, lldb::addr_t header_addr) {
979 if (ObjectFileMachO::MagicBytesMatch(data_sp, offset: 0, length: data_sp->GetByteSize())) {
980 std::unique_ptr<ObjectFile> objfile_up(
981 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
982 if (objfile_up.get() && objfile_up->ParseHeader())
983 return objfile_up.release();
984 }
985 return nullptr;
986}
987
988size_t ObjectFileMachO::GetModuleSpecifications(
989 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
990 lldb::offset_t data_offset, lldb::offset_t file_offset,
991 lldb::offset_t length, lldb_private::ModuleSpecList &specs) {
992 const size_t initial_count = specs.GetSize();
993
994 if (ObjectFileMachO::MagicBytesMatch(data_sp, offset: 0, length: data_sp->GetByteSize())) {
995 DataExtractor data;
996 data.SetData(data_sp);
997 llvm::MachO::mach_header header;
998 if (ParseHeader(data, data_offset_ptr: &data_offset, header)) {
999 size_t header_and_load_cmds =
1000 header.sizeofcmds + MachHeaderSizeFromMagic(magic: header.magic);
1001 if (header_and_load_cmds >= data_sp->GetByteSize()) {
1002 data_sp = MapFileData(file, Size: header_and_load_cmds, Offset: file_offset);
1003 data.SetData(data_sp);
1004 data_offset = MachHeaderSizeFromMagic(magic: header.magic);
1005 }
1006 if (data_sp) {
1007 ModuleSpec base_spec;
1008 base_spec.GetFileSpec() = file;
1009 base_spec.SetObjectOffset(file_offset);
1010 base_spec.SetObjectSize(length);
1011 GetAllArchSpecs(header, data, lc_offset: data_offset, base_spec, all_specs&: specs);
1012 }
1013 }
1014 }
1015 return specs.GetSize() - initial_count;
1016}
1017
1018ConstString ObjectFileMachO::GetSegmentNameTEXT() {
1019 static ConstString g_segment_name_TEXT("__TEXT");
1020 return g_segment_name_TEXT;
1021}
1022
1023ConstString ObjectFileMachO::GetSegmentNameDATA() {
1024 static ConstString g_segment_name_DATA("__DATA");
1025 return g_segment_name_DATA;
1026}
1027
1028ConstString ObjectFileMachO::GetSegmentNameDATA_DIRTY() {
1029 static ConstString g_segment_name("__DATA_DIRTY");
1030 return g_segment_name;
1031}
1032
1033ConstString ObjectFileMachO::GetSegmentNameDATA_CONST() {
1034 static ConstString g_segment_name("__DATA_CONST");
1035 return g_segment_name;
1036}
1037
1038ConstString ObjectFileMachO::GetSegmentNameOBJC() {
1039 static ConstString g_segment_name_OBJC("__OBJC");
1040 return g_segment_name_OBJC;
1041}
1042
1043ConstString ObjectFileMachO::GetSegmentNameLINKEDIT() {
1044 static ConstString g_section_name_LINKEDIT("__LINKEDIT");
1045 return g_section_name_LINKEDIT;
1046}
1047
1048ConstString ObjectFileMachO::GetSegmentNameDWARF() {
1049 static ConstString g_section_name("__DWARF");
1050 return g_section_name;
1051}
1052
1053ConstString ObjectFileMachO::GetSegmentNameLLVM_COV() {
1054 static ConstString g_section_name("__LLVM_COV");
1055 return g_section_name;
1056}
1057
1058ConstString ObjectFileMachO::GetSectionNameEHFrame() {
1059 static ConstString g_section_name_eh_frame("__eh_frame");
1060 return g_section_name_eh_frame;
1061}
1062
1063ConstString ObjectFileMachO::GetSectionNameLLDBNoNlist() {
1064 static ConstString g_section_name_lldb_no_nlist("__lldb_no_nlist");
1065 return g_section_name_lldb_no_nlist;
1066}
1067
1068bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
1069 lldb::addr_t data_offset,
1070 lldb::addr_t data_length) {
1071 DataExtractor data;
1072 data.SetData(data_sp, offset: data_offset, length: data_length);
1073 lldb::offset_t offset = 0;
1074 uint32_t magic = data.GetU32(offset_ptr: &offset);
1075
1076 offset += 4; // cputype
1077 offset += 4; // cpusubtype
1078 uint32_t filetype = data.GetU32(offset_ptr: &offset);
1079
1080 // A fileset has a Mach-O header but is not an
1081 // individual file and must be handled via an
1082 // ObjectContainer plugin.
1083 if (filetype == llvm::MachO::MH_FILESET)
1084 return false;
1085
1086 return MachHeaderSizeFromMagic(magic) != 0;
1087}
1088
1089ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
1090 DataBufferSP data_sp,
1091 lldb::offset_t data_offset,
1092 const FileSpec *file,
1093 lldb::offset_t file_offset,
1094 lldb::offset_t length)
1095 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
1096 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
1097 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
1098 m_allow_assembly_emulation_unwind_plans(true) {
1099 ::memset(s: &m_header, c: 0, n: sizeof(m_header));
1100 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
1101}
1102
1103ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
1104 lldb::WritableDataBufferSP header_data_sp,
1105 const lldb::ProcessSP &process_sp,
1106 lldb::addr_t header_addr)
1107 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
1108 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
1109 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
1110 m_allow_assembly_emulation_unwind_plans(true) {
1111 ::memset(s: &m_header, c: 0, n: sizeof(m_header));
1112 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
1113}
1114
1115bool ObjectFileMachO::ParseHeader(DataExtractor &data,
1116 lldb::offset_t *data_offset_ptr,
1117 llvm::MachO::mach_header &header) {
1118 data.SetByteOrder(endian::InlHostByteOrder());
1119 // Leave magic in the original byte order
1120 header.magic = data.GetU32(offset_ptr: data_offset_ptr);
1121 bool can_parse = false;
1122 bool is_64_bit = false;
1123 switch (header.magic) {
1124 case MH_MAGIC:
1125 data.SetByteOrder(endian::InlHostByteOrder());
1126 data.SetAddressByteSize(4);
1127 can_parse = true;
1128 break;
1129
1130 case MH_MAGIC_64:
1131 data.SetByteOrder(endian::InlHostByteOrder());
1132 data.SetAddressByteSize(8);
1133 can_parse = true;
1134 is_64_bit = true;
1135 break;
1136
1137 case MH_CIGAM:
1138 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1139 ? eByteOrderLittle
1140 : eByteOrderBig);
1141 data.SetAddressByteSize(4);
1142 can_parse = true;
1143 break;
1144
1145 case MH_CIGAM_64:
1146 data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1147 ? eByteOrderLittle
1148 : eByteOrderBig);
1149 data.SetAddressByteSize(8);
1150 is_64_bit = true;
1151 can_parse = true;
1152 break;
1153
1154 default:
1155 break;
1156 }
1157
1158 if (can_parse) {
1159 data.GetU32(offset_ptr: data_offset_ptr, dst: &header.cputype, count: 6);
1160 if (is_64_bit)
1161 *data_offset_ptr += 4;
1162 return true;
1163 } else {
1164 memset(s: &header, c: 0, n: sizeof(header));
1165 }
1166 return false;
1167}
1168
1169bool ObjectFileMachO::ParseHeader() {
1170 ModuleSP module_sp(GetModule());
1171 if (!module_sp)
1172 return false;
1173
1174 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1175 bool can_parse = false;
1176 lldb::offset_t offset = 0;
1177 m_data.SetByteOrder(endian::InlHostByteOrder());
1178 // Leave magic in the original byte order
1179 m_header.magic = m_data.GetU32(offset_ptr: &offset);
1180 switch (m_header.magic) {
1181 case MH_MAGIC:
1182 m_data.SetByteOrder(endian::InlHostByteOrder());
1183 m_data.SetAddressByteSize(4);
1184 can_parse = true;
1185 break;
1186
1187 case MH_MAGIC_64:
1188 m_data.SetByteOrder(endian::InlHostByteOrder());
1189 m_data.SetAddressByteSize(8);
1190 can_parse = true;
1191 break;
1192
1193 case MH_CIGAM:
1194 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1195 ? eByteOrderLittle
1196 : eByteOrderBig);
1197 m_data.SetAddressByteSize(4);
1198 can_parse = true;
1199 break;
1200
1201 case MH_CIGAM_64:
1202 m_data.SetByteOrder(endian::InlHostByteOrder() == eByteOrderBig
1203 ? eByteOrderLittle
1204 : eByteOrderBig);
1205 m_data.SetAddressByteSize(8);
1206 can_parse = true;
1207 break;
1208
1209 default:
1210 break;
1211 }
1212
1213 if (can_parse) {
1214 m_data.GetU32(offset_ptr: &offset, dst: &m_header.cputype, count: 6);
1215
1216 ModuleSpecList all_specs;
1217 ModuleSpec base_spec;
1218 GetAllArchSpecs(header: m_header, data: m_data, lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic),
1219 base_spec, all_specs);
1220
1221 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1222 ArchSpec mach_arch =
1223 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
1224
1225 // Check if the module has a required architecture
1226 const ArchSpec &module_arch = module_sp->GetArchitecture();
1227 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(rhs: mach_arch))
1228 continue;
1229
1230 if (SetModulesArchitecture(mach_arch)) {
1231 const size_t header_and_lc_size =
1232 m_header.sizeofcmds + MachHeaderSizeFromMagic(magic: m_header.magic);
1233 if (m_data.GetByteSize() < header_and_lc_size) {
1234 DataBufferSP data_sp;
1235 ProcessSP process_sp(m_process_wp.lock());
1236 if (process_sp) {
1237 data_sp = ReadMemory(process_sp, addr: m_memory_addr, byte_size: header_and_lc_size);
1238 } else {
1239 // Read in all only the load command data from the file on disk
1240 data_sp = MapFileData(file: m_file, Size: header_and_lc_size, Offset: m_file_offset);
1241 if (data_sp->GetByteSize() != header_and_lc_size)
1242 continue;
1243 }
1244 if (data_sp)
1245 m_data.SetData(data_sp);
1246 }
1247 }
1248 return true;
1249 }
1250 // None found.
1251 return false;
1252 } else {
1253 memset(s: &m_header, c: 0, n: sizeof(struct llvm::MachO::mach_header));
1254 }
1255 return false;
1256}
1257
1258ByteOrder ObjectFileMachO::GetByteOrder() const {
1259 return m_data.GetByteOrder();
1260}
1261
1262bool ObjectFileMachO::IsExecutable() const {
1263 return m_header.filetype == MH_EXECUTE;
1264}
1265
1266bool ObjectFileMachO::IsDynamicLoader() const {
1267 return m_header.filetype == MH_DYLINKER;
1268}
1269
1270bool ObjectFileMachO::IsSharedCacheBinary() const {
1271 return m_header.flags & MH_DYLIB_IN_CACHE;
1272}
1273
1274bool ObjectFileMachO::IsKext() const {
1275 return m_header.filetype == MH_KEXT_BUNDLE;
1276}
1277
1278uint32_t ObjectFileMachO::GetAddressByteSize() const {
1279 return m_data.GetAddressByteSize();
1280}
1281
1282AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
1283 Symtab *symtab = GetSymtab();
1284 if (!symtab)
1285 return AddressClass::eUnknown;
1286
1287 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1288 if (symbol) {
1289 if (symbol->ValueIsAddress()) {
1290 SectionSP section_sp(symbol->GetAddressRef().GetSection());
1291 if (section_sp) {
1292 const lldb::SectionType section_type = section_sp->GetType();
1293 switch (section_type) {
1294 case eSectionTypeInvalid:
1295 return AddressClass::eUnknown;
1296
1297 case eSectionTypeCode:
1298 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1299 // For ARM we have a bit in the n_desc field of the symbol that
1300 // tells us ARM/Thumb which is bit 0x0008.
1301 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1302 return AddressClass::eCodeAlternateISA;
1303 }
1304 return AddressClass::eCode;
1305
1306 case eSectionTypeContainer:
1307 return AddressClass::eUnknown;
1308
1309 case eSectionTypeData:
1310 case eSectionTypeDataCString:
1311 case eSectionTypeDataCStringPointers:
1312 case eSectionTypeDataSymbolAddress:
1313 case eSectionTypeData4:
1314 case eSectionTypeData8:
1315 case eSectionTypeData16:
1316 case eSectionTypeDataPointers:
1317 case eSectionTypeZeroFill:
1318 case eSectionTypeDataObjCMessageRefs:
1319 case eSectionTypeDataObjCCFStrings:
1320 case eSectionTypeGoSymtab:
1321 return AddressClass::eData;
1322
1323 case eSectionTypeDebug:
1324 case eSectionTypeDWARFDebugAbbrev:
1325 case eSectionTypeDWARFDebugAbbrevDwo:
1326 case eSectionTypeDWARFDebugAddr:
1327 case eSectionTypeDWARFDebugAranges:
1328 case eSectionTypeDWARFDebugCuIndex:
1329 case eSectionTypeDWARFDebugFrame:
1330 case eSectionTypeDWARFDebugInfo:
1331 case eSectionTypeDWARFDebugInfoDwo:
1332 case eSectionTypeDWARFDebugLine:
1333 case eSectionTypeDWARFDebugLineStr:
1334 case eSectionTypeDWARFDebugLoc:
1335 case eSectionTypeDWARFDebugLocDwo:
1336 case eSectionTypeDWARFDebugLocLists:
1337 case eSectionTypeDWARFDebugLocListsDwo:
1338 case eSectionTypeDWARFDebugMacInfo:
1339 case eSectionTypeDWARFDebugMacro:
1340 case eSectionTypeDWARFDebugNames:
1341 case eSectionTypeDWARFDebugPubNames:
1342 case eSectionTypeDWARFDebugPubTypes:
1343 case eSectionTypeDWARFDebugRanges:
1344 case eSectionTypeDWARFDebugRngLists:
1345 case eSectionTypeDWARFDebugRngListsDwo:
1346 case eSectionTypeDWARFDebugStr:
1347 case eSectionTypeDWARFDebugStrDwo:
1348 case eSectionTypeDWARFDebugStrOffsets:
1349 case eSectionTypeDWARFDebugStrOffsetsDwo:
1350 case eSectionTypeDWARFDebugTuIndex:
1351 case eSectionTypeDWARFDebugTypes:
1352 case eSectionTypeDWARFDebugTypesDwo:
1353 case eSectionTypeDWARFAppleNames:
1354 case eSectionTypeDWARFAppleTypes:
1355 case eSectionTypeDWARFAppleNamespaces:
1356 case eSectionTypeDWARFAppleObjC:
1357 case eSectionTypeDWARFGNUDebugAltLink:
1358 case eSectionTypeCTF:
1359 case eSectionTypeLLDBTypeSummaries:
1360 case eSectionTypeLLDBFormatters:
1361 case eSectionTypeSwiftModules:
1362 return AddressClass::eDebug;
1363
1364 case eSectionTypeEHFrame:
1365 case eSectionTypeARMexidx:
1366 case eSectionTypeARMextab:
1367 case eSectionTypeCompactUnwind:
1368 return AddressClass::eRuntime;
1369
1370 case eSectionTypeAbsoluteAddress:
1371 case eSectionTypeELFSymbolTable:
1372 case eSectionTypeELFDynamicSymbols:
1373 case eSectionTypeELFRelocationEntries:
1374 case eSectionTypeELFDynamicLinkInfo:
1375 case eSectionTypeOther:
1376 return AddressClass::eUnknown;
1377 }
1378 }
1379 }
1380
1381 const SymbolType symbol_type = symbol->GetType();
1382 switch (symbol_type) {
1383 case eSymbolTypeAny:
1384 return AddressClass::eUnknown;
1385 case eSymbolTypeAbsolute:
1386 return AddressClass::eUnknown;
1387
1388 case eSymbolTypeCode:
1389 case eSymbolTypeTrampoline:
1390 case eSymbolTypeResolver:
1391 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1392 // For ARM we have a bit in the n_desc field of the symbol that tells
1393 // us ARM/Thumb which is bit 0x0008.
1394 if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1395 return AddressClass::eCodeAlternateISA;
1396 }
1397 return AddressClass::eCode;
1398
1399 case eSymbolTypeData:
1400 return AddressClass::eData;
1401 case eSymbolTypeRuntime:
1402 return AddressClass::eRuntime;
1403 case eSymbolTypeException:
1404 return AddressClass::eRuntime;
1405 case eSymbolTypeSourceFile:
1406 return AddressClass::eDebug;
1407 case eSymbolTypeHeaderFile:
1408 return AddressClass::eDebug;
1409 case eSymbolTypeObjectFile:
1410 return AddressClass::eDebug;
1411 case eSymbolTypeCommonBlock:
1412 return AddressClass::eDebug;
1413 case eSymbolTypeBlock:
1414 return AddressClass::eDebug;
1415 case eSymbolTypeLocal:
1416 return AddressClass::eData;
1417 case eSymbolTypeParam:
1418 return AddressClass::eData;
1419 case eSymbolTypeVariable:
1420 return AddressClass::eData;
1421 case eSymbolTypeVariableType:
1422 return AddressClass::eDebug;
1423 case eSymbolTypeLineEntry:
1424 return AddressClass::eDebug;
1425 case eSymbolTypeLineHeader:
1426 return AddressClass::eDebug;
1427 case eSymbolTypeScopeBegin:
1428 return AddressClass::eDebug;
1429 case eSymbolTypeScopeEnd:
1430 return AddressClass::eDebug;
1431 case eSymbolTypeAdditional:
1432 return AddressClass::eUnknown;
1433 case eSymbolTypeCompiler:
1434 return AddressClass::eDebug;
1435 case eSymbolTypeInstrumentation:
1436 return AddressClass::eDebug;
1437 case eSymbolTypeUndefined:
1438 return AddressClass::eUnknown;
1439 case eSymbolTypeObjCClass:
1440 return AddressClass::eRuntime;
1441 case eSymbolTypeObjCMetaClass:
1442 return AddressClass::eRuntime;
1443 case eSymbolTypeObjCIVar:
1444 return AddressClass::eRuntime;
1445 case eSymbolTypeReExported:
1446 return AddressClass::eRuntime;
1447 }
1448 }
1449 return AddressClass::eUnknown;
1450}
1451
1452bool ObjectFileMachO::IsStripped() {
1453 if (m_dysymtab.cmd == 0) {
1454 ModuleSP module_sp(GetModule());
1455 if (module_sp) {
1456 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1457 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1458 const lldb::offset_t load_cmd_offset = offset;
1459
1460 llvm::MachO::load_command lc = {};
1461 if (m_data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
1462 break;
1463 if (lc.cmd == LC_DYSYMTAB) {
1464 m_dysymtab.cmd = lc.cmd;
1465 m_dysymtab.cmdsize = lc.cmdsize;
1466 if (m_data.GetU32(offset_ptr: &offset, dst: &m_dysymtab.ilocalsym,
1467 count: (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1468 nullptr) {
1469 // Clear m_dysymtab if we were unable to read all items from the
1470 // load command
1471 ::memset(s: &m_dysymtab, c: 0, n: sizeof(m_dysymtab));
1472 }
1473 }
1474 offset = load_cmd_offset + lc.cmdsize;
1475 }
1476 }
1477 }
1478 if (m_dysymtab.cmd)
1479 return m_dysymtab.nlocalsym <= 1;
1480 return false;
1481}
1482
1483ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() {
1484 EncryptedFileRanges result;
1485 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
1486
1487 llvm::MachO::encryption_info_command encryption_cmd;
1488 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1489 const lldb::offset_t load_cmd_offset = offset;
1490 if (m_data.GetU32(offset_ptr: &offset, dst: &encryption_cmd, count: 2) == nullptr)
1491 break;
1492
1493 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1494 // 3 fields we care about, so treat them the same.
1495 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1496 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1497 if (m_data.GetU32(offset_ptr: &offset, dst: &encryption_cmd.cryptoff, count: 3)) {
1498 if (encryption_cmd.cryptid != 0) {
1499 EncryptedFileRanges::Entry entry;
1500 entry.SetRangeBase(encryption_cmd.cryptoff);
1501 entry.SetByteSize(encryption_cmd.cryptsize);
1502 result.Append(entry);
1503 }
1504 }
1505 }
1506 offset = load_cmd_offset + encryption_cmd.cmdsize;
1507 }
1508
1509 return result;
1510}
1511
1512void ObjectFileMachO::SanitizeSegmentCommand(
1513 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1514 if (m_length == 0 || seg_cmd.filesize == 0)
1515 return;
1516
1517 if (IsSharedCacheBinary() && !IsInMemory()) {
1518 // In shared cache images, the load commands are relative to the
1519 // shared cache file, and not the specific image we are
1520 // examining. Let's fix this up so that it looks like a normal
1521 // image.
1522 if (strncmp(s1: seg_cmd.segname, s2: GetSegmentNameTEXT().GetCString(),
1523 n: sizeof(seg_cmd.segname)) == 0)
1524 m_text_address = seg_cmd.vmaddr;
1525 if (strncmp(s1: seg_cmd.segname, s2: GetSegmentNameLINKEDIT().GetCString(),
1526 n: sizeof(seg_cmd.segname)) == 0)
1527 m_linkedit_original_offset = seg_cmd.fileoff;
1528
1529 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1530 }
1531
1532 if (seg_cmd.fileoff > m_length) {
1533 // We have a load command that says it extends past the end of the file.
1534 // This is likely a corrupt file. We don't have any way to return an error
1535 // condition here (this method was likely invoked from something like
1536 // ObjectFile::GetSectionList()), so we just null out the section contents,
1537 // and dump a message to stdout. The most common case here is core file
1538 // debugging with a truncated file.
1539 const char *lc_segment_name =
1540 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1541 GetModule()->ReportWarning(
1542 format: "load command {0} {1} has a fileoff ({2:x16}) that extends beyond "
1543 "the end of the file ({3:x16}), ignoring this section",
1544 args&: cmd_idx, args&: lc_segment_name, args&: seg_cmd.fileoff, args&: m_length);
1545
1546 seg_cmd.fileoff = 0;
1547 seg_cmd.filesize = 0;
1548 }
1549
1550 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1551 // We have a load command that says it extends past the end of the file.
1552 // This is likely a corrupt file. We don't have any way to return an error
1553 // condition here (this method was likely invoked from something like
1554 // ObjectFile::GetSectionList()), so we just null out the section contents,
1555 // and dump a message to stdout. The most common case here is core file
1556 // debugging with a truncated file.
1557 const char *lc_segment_name =
1558 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1559 GetModule()->ReportWarning(
1560 format: "load command {0} {1} has a fileoff + filesize ({2:x16}) that "
1561 "extends beyond the end of the file ({3:x16}), the segment will be "
1562 "truncated to match",
1563 args&: cmd_idx, args&: lc_segment_name, args: seg_cmd.fileoff + seg_cmd.filesize, args&: m_length);
1564
1565 // Truncate the length
1566 seg_cmd.filesize = m_length - seg_cmd.fileoff;
1567 }
1568}
1569
1570static uint32_t
1571GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1572 uint32_t result = 0;
1573 if (seg_cmd.initprot & VM_PROT_READ)
1574 result |= ePermissionsReadable;
1575 if (seg_cmd.initprot & VM_PROT_WRITE)
1576 result |= ePermissionsWritable;
1577 if (seg_cmd.initprot & VM_PROT_EXECUTE)
1578 result |= ePermissionsExecutable;
1579 return result;
1580}
1581
1582static lldb::SectionType GetSectionType(uint32_t flags,
1583 ConstString section_name) {
1584
1585 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1586 return eSectionTypeCode;
1587
1588 uint32_t mach_sect_type = flags & SECTION_TYPE;
1589 static ConstString g_sect_name_objc_data("__objc_data");
1590 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1591 static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1592 static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1593 static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1594 static ConstString g_sect_name_objc_const("__objc_const");
1595 static ConstString g_sect_name_objc_classlist("__objc_classlist");
1596 static ConstString g_sect_name_cfstring("__cfstring");
1597
1598 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs");
1599 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo");
1600 static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1601 static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1602 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1603 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1604 static ConstString g_sect_name_eh_frame("__eh_frame");
1605 static ConstString g_sect_name_compact_unwind("__unwind_info");
1606 static ConstString g_sect_name_text("__text");
1607 static ConstString g_sect_name_data("__data");
1608 static ConstString g_sect_name_go_symtab("__gosymtab");
1609 static ConstString g_sect_name_ctf("__ctf");
1610 static ConstString g_sect_name_lldb_summaries("__lldbsummaries");
1611 static ConstString g_sect_name_lldb_formatters("__lldbformatters");
1612 static ConstString g_sect_name_swift_ast("__swift_ast");
1613
1614 if (section_name == g_sect_name_dwarf_debug_str_offs)
1615 return eSectionTypeDWARFDebugStrOffsets;
1616 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo)
1617 return eSectionTypeDWARFDebugStrOffsetsDwo;
1618
1619 llvm::StringRef stripped_name = section_name.GetStringRef();
1620 if (stripped_name.consume_front(Prefix: "__debug_"))
1621 return ObjectFile::GetDWARFSectionTypeFromName(name: stripped_name);
1622
1623 if (section_name == g_sect_name_dwarf_apple_names)
1624 return eSectionTypeDWARFAppleNames;
1625 if (section_name == g_sect_name_dwarf_apple_types)
1626 return eSectionTypeDWARFAppleTypes;
1627 if (section_name == g_sect_name_dwarf_apple_namespaces)
1628 return eSectionTypeDWARFAppleNamespaces;
1629 if (section_name == g_sect_name_dwarf_apple_objc)
1630 return eSectionTypeDWARFAppleObjC;
1631 if (section_name == g_sect_name_objc_selrefs)
1632 return eSectionTypeDataCStringPointers;
1633 if (section_name == g_sect_name_objc_msgrefs)
1634 return eSectionTypeDataObjCMessageRefs;
1635 if (section_name == g_sect_name_eh_frame)
1636 return eSectionTypeEHFrame;
1637 if (section_name == g_sect_name_compact_unwind)
1638 return eSectionTypeCompactUnwind;
1639 if (section_name == g_sect_name_cfstring)
1640 return eSectionTypeDataObjCCFStrings;
1641 if (section_name == g_sect_name_go_symtab)
1642 return eSectionTypeGoSymtab;
1643 if (section_name == g_sect_name_ctf)
1644 return eSectionTypeCTF;
1645 if (section_name == g_sect_name_lldb_summaries)
1646 return lldb::eSectionTypeLLDBTypeSummaries;
1647 if (section_name == g_sect_name_lldb_formatters)
1648 return lldb::eSectionTypeLLDBFormatters;
1649 if (section_name == g_sect_name_swift_ast)
1650 return eSectionTypeSwiftModules;
1651 if (section_name == g_sect_name_objc_data ||
1652 section_name == g_sect_name_objc_classrefs ||
1653 section_name == g_sect_name_objc_superrefs ||
1654 section_name == g_sect_name_objc_const ||
1655 section_name == g_sect_name_objc_classlist) {
1656 return eSectionTypeDataPointers;
1657 }
1658
1659 switch (mach_sect_type) {
1660 // TODO: categorize sections by other flags for regular sections
1661 case S_REGULAR:
1662 if (section_name == g_sect_name_text)
1663 return eSectionTypeCode;
1664 if (section_name == g_sect_name_data)
1665 return eSectionTypeData;
1666 return eSectionTypeOther;
1667 case S_ZEROFILL:
1668 return eSectionTypeZeroFill;
1669 case S_CSTRING_LITERALS: // section with only literal C strings
1670 return eSectionTypeDataCString;
1671 case S_4BYTE_LITERALS: // section with only 4 byte literals
1672 return eSectionTypeData4;
1673 case S_8BYTE_LITERALS: // section with only 8 byte literals
1674 return eSectionTypeData8;
1675 case S_LITERAL_POINTERS: // section with only pointers to literals
1676 return eSectionTypeDataPointers;
1677 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1678 return eSectionTypeDataPointers;
1679 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1680 return eSectionTypeDataPointers;
1681 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1682 // the reserved2 field
1683 return eSectionTypeCode;
1684 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1685 // initialization
1686 return eSectionTypeDataPointers;
1687 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1688 // termination
1689 return eSectionTypeDataPointers;
1690 case S_COALESCED:
1691 return eSectionTypeOther;
1692 case S_GB_ZEROFILL:
1693 return eSectionTypeZeroFill;
1694 case S_INTERPOSING: // section with only pairs of function pointers for
1695 // interposing
1696 return eSectionTypeCode;
1697 case S_16BYTE_LITERALS: // section with only 16 byte literals
1698 return eSectionTypeData16;
1699 case S_DTRACE_DOF:
1700 return eSectionTypeDebug;
1701 case S_LAZY_DYLIB_SYMBOL_POINTERS:
1702 return eSectionTypeDataPointers;
1703 default:
1704 return eSectionTypeOther;
1705 }
1706}
1707
1708struct ObjectFileMachO::SegmentParsingContext {
1709 const EncryptedFileRanges EncryptedRanges;
1710 lldb_private::SectionList &UnifiedList;
1711 uint32_t NextSegmentIdx = 0;
1712 uint32_t NextSectionIdx = 0;
1713 bool FileAddressesChanged = false;
1714
1715 SegmentParsingContext(EncryptedFileRanges EncryptedRanges,
1716 lldb_private::SectionList &UnifiedList)
1717 : EncryptedRanges(std::move(EncryptedRanges)), UnifiedList(UnifiedList) {}
1718};
1719
1720void ObjectFileMachO::ProcessSegmentCommand(
1721 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1722 uint32_t cmd_idx, SegmentParsingContext &context) {
1723 llvm::MachO::segment_command_64 load_cmd;
1724 memcpy(dest: &load_cmd, src: &load_cmd_, n: sizeof(load_cmd_));
1725
1726 if (!m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)load_cmd.segname, count: 16))
1727 return;
1728
1729 ModuleSP module_sp = GetModule();
1730 const bool is_core = GetType() == eTypeCoreFile;
1731 const bool is_dsym = (m_header.filetype == MH_DSYM);
1732 bool add_section = true;
1733 bool add_to_unified = true;
1734 ConstString const_segname(
1735 load_cmd.segname, strnlen(string: load_cmd.segname, maxlen: sizeof(load_cmd.segname)));
1736
1737 SectionSP unified_section_sp(
1738 context.UnifiedList.FindSectionByName(section_dstr: const_segname));
1739 if (is_dsym && unified_section_sp) {
1740 if (const_segname == GetSegmentNameLINKEDIT()) {
1741 // We need to keep the __LINKEDIT segment private to this object file
1742 // only
1743 add_to_unified = false;
1744 } else {
1745 // This is the dSYM file and this section has already been created by the
1746 // object file, no need to create it.
1747 add_section = false;
1748 }
1749 }
1750 load_cmd.vmaddr = m_data.GetAddress(offset_ptr: &offset);
1751 load_cmd.vmsize = m_data.GetAddress(offset_ptr: &offset);
1752 load_cmd.fileoff = m_data.GetAddress(offset_ptr: &offset);
1753 load_cmd.filesize = m_data.GetAddress(offset_ptr: &offset);
1754 if (!m_data.GetU32(offset_ptr: &offset, dst: &load_cmd.maxprot, count: 4))
1755 return;
1756
1757 SanitizeSegmentCommand(seg_cmd&: load_cmd, cmd_idx);
1758
1759 const uint32_t segment_permissions = GetSegmentPermissions(seg_cmd: load_cmd);
1760 const bool segment_is_encrypted =
1761 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1762
1763 // Use a segment ID of the segment index shifted left by 8 so they never
1764 // conflict with any of the sections.
1765 SectionSP segment_sp;
1766 if (add_section && (const_segname || is_core)) {
1767 segment_sp = std::make_shared<Section>(
1768 args&: module_sp, // Module to which this section belongs
1769 args: this, // Object file to which this sections belongs
1770 args: ++context.NextSegmentIdx
1771 << 8, // Section ID is the 1 based segment index
1772 // shifted right by 8 bits as not to collide with any of the 256
1773 // section IDs that are possible
1774 args&: const_segname, // Name of this section
1775 args: eSectionTypeContainer, // This section is a container of other
1776 // sections.
1777 args&: load_cmd.vmaddr, // File VM address == addresses as they are
1778 // found in the object file
1779 args&: load_cmd.vmsize, // VM size in bytes of this section
1780 args&: load_cmd.fileoff, // Offset to the data for this section in
1781 // the file
1782 args&: load_cmd.filesize, // Size in bytes of this section as found
1783 // in the file
1784 args: 0, // Segments have no alignment information
1785 args&: load_cmd.flags); // Flags for this section
1786
1787 segment_sp->SetIsEncrypted(segment_is_encrypted);
1788 m_sections_up->AddSection(section_sp: segment_sp);
1789 segment_sp->SetPermissions(segment_permissions);
1790 if (add_to_unified)
1791 context.UnifiedList.AddSection(section_sp: segment_sp);
1792 } else if (unified_section_sp) {
1793 // If this is a dSYM and the file addresses in the dSYM differ from the
1794 // file addresses in the ObjectFile, we must use the file base address for
1795 // the Section from the dSYM for the DWARF to resolve correctly.
1796 // This only happens with binaries in the shared cache in practice;
1797 // normally a mismatch like this would give a binary & dSYM that do not
1798 // match UUIDs. When a binary is included in the shared cache, its
1799 // segments are rearranged to optimize the shared cache, so its file
1800 // addresses will differ from what the ObjectFile had originally,
1801 // and what the dSYM has.
1802 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1803 Log *log = GetLog(mask: LLDBLog::Symbols);
1804 if (log) {
1805 log->Printf(
1806 format: "Installing dSYM's %s segment file address over ObjectFile's "
1807 "so symbol table/debug info resolves correctly for %s",
1808 const_segname.AsCString(),
1809 module_sp->GetFileSpec().GetFilename().AsCString());
1810 }
1811
1812 // Make sure we've parsed the symbol table from the ObjectFile before
1813 // we go around changing its Sections.
1814 module_sp->GetObjectFile()->GetSymtab();
1815 // eh_frame would present the same problems but we parse that on a per-
1816 // function basis as-needed so it's more difficult to remove its use of
1817 // the Sections. Realistically, the environments where this code path
1818 // will be taken will not have eh_frame sections.
1819
1820 unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1821
1822 // Notify the module that the section addresses have been changed once
1823 // we're done so any file-address caches can be updated.
1824 context.FileAddressesChanged = true;
1825 }
1826 m_sections_up->AddSection(section_sp: unified_section_sp);
1827 }
1828
1829 llvm::MachO::section_64 sect64;
1830 ::memset(s: &sect64, c: 0, n: sizeof(sect64));
1831 // Push a section into our mach sections for the section at index zero
1832 // (NO_SECT) if we don't have any mach sections yet...
1833 if (m_mach_sections.empty())
1834 m_mach_sections.push_back(x: sect64);
1835 uint32_t segment_sect_idx;
1836 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1837
1838 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1839 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1840 ++segment_sect_idx) {
1841 if (m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)sect64.sectname,
1842 count: sizeof(sect64.sectname)) == nullptr)
1843 break;
1844 if (m_data.GetU8(offset_ptr: &offset, dst: (uint8_t *)sect64.segname,
1845 count: sizeof(sect64.segname)) == nullptr)
1846 break;
1847 sect64.addr = m_data.GetAddress(offset_ptr: &offset);
1848 sect64.size = m_data.GetAddress(offset_ptr: &offset);
1849
1850 if (m_data.GetU32(offset_ptr: &offset, dst: &sect64.offset, count: num_u32s) == nullptr)
1851 break;
1852
1853 if (IsSharedCacheBinary() && !IsInMemory()) {
1854 sect64.offset = sect64.addr - m_text_address;
1855 }
1856
1857 // Keep a list of mach sections around in case we need to get at data that
1858 // isn't stored in the abstracted Sections.
1859 m_mach_sections.push_back(x: sect64);
1860
1861 if (add_section) {
1862 ConstString section_name(
1863 sect64.sectname, strnlen(string: sect64.sectname, maxlen: sizeof(sect64.sectname)));
1864 if (!const_segname) {
1865 // We have a segment with no name so we need to conjure up segments
1866 // that correspond to the section's segname if there isn't already such
1867 // a section. If there is such a section, we resize the section so that
1868 // it spans all sections. We also mark these sections as fake so
1869 // address matches don't hit if they land in the gaps between the child
1870 // sections.
1871 const_segname.SetTrimmedCStringWithLength(cstr: sect64.segname,
1872 fixed_cstr_len: sizeof(sect64.segname));
1873 segment_sp = context.UnifiedList.FindSectionByName(section_dstr: const_segname);
1874 if (segment_sp.get()) {
1875 Section *segment = segment_sp.get();
1876 // Grow the section size as needed.
1877 const lldb::addr_t sect64_min_addr = sect64.addr;
1878 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1879 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1880 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1881 const lldb::addr_t curr_seg_max_addr =
1882 curr_seg_min_addr + curr_seg_byte_size;
1883 if (sect64_min_addr >= curr_seg_min_addr) {
1884 const lldb::addr_t new_seg_byte_size =
1885 sect64_max_addr - curr_seg_min_addr;
1886 // Only grow the section size if needed
1887 if (new_seg_byte_size > curr_seg_byte_size)
1888 segment->SetByteSize(new_seg_byte_size);
1889 } else {
1890 // We need to change the base address of the segment and adjust the
1891 // child section offsets for all existing children.
1892 const lldb::addr_t slide_amount =
1893 sect64_min_addr - curr_seg_min_addr;
1894 segment->Slide(slide_amount, slide_children: false);
1895 segment->GetChildren().Slide(slide_amount: -slide_amount, slide_children: false);
1896 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1897 }
1898
1899 // Grow the section size as needed.
1900 if (sect64.offset) {
1901 const lldb::addr_t segment_min_file_offset =
1902 segment->GetFileOffset();
1903 const lldb::addr_t segment_max_file_offset =
1904 segment_min_file_offset + segment->GetFileSize();
1905
1906 const lldb::addr_t section_min_file_offset = sect64.offset;
1907 const lldb::addr_t section_max_file_offset =
1908 section_min_file_offset + sect64.size;
1909 const lldb::addr_t new_file_offset =
1910 std::min(a: section_min_file_offset, b: segment_min_file_offset);
1911 const lldb::addr_t new_file_size =
1912 std::max(a: section_max_file_offset, b: segment_max_file_offset) -
1913 new_file_offset;
1914 segment->SetFileOffset(new_file_offset);
1915 segment->SetFileSize(new_file_size);
1916 }
1917 } else {
1918 // Create a fake section for the section's named segment
1919 segment_sp = std::make_shared<Section>(
1920 args&: segment_sp, // Parent section
1921 args&: module_sp, // Module to which this section belongs
1922 args: this, // Object file to which this section belongs
1923 args: ++context.NextSegmentIdx
1924 << 8, // Section ID is the 1 based segment index
1925 // shifted right by 8 bits as not to
1926 // collide with any of the 256 section IDs
1927 // that are possible
1928 args&: const_segname, // Name of this section
1929 args: eSectionTypeContainer, // This section is a container of
1930 // other sections.
1931 args&: sect64.addr, // File VM address == addresses as they are
1932 // found in the object file
1933 args&: sect64.size, // VM size in bytes of this section
1934 args&: sect64.offset, // Offset to the data for this section in
1935 // the file
1936 args: sect64.offset ? sect64.size : 0, // Size in bytes of
1937 // this section as
1938 // found in the file
1939 args&: sect64.align,
1940 args&: load_cmd.flags); // Flags for this section
1941 segment_sp->SetIsFake(true);
1942 segment_sp->SetPermissions(segment_permissions);
1943 m_sections_up->AddSection(section_sp: segment_sp);
1944 if (add_to_unified)
1945 context.UnifiedList.AddSection(section_sp: segment_sp);
1946 segment_sp->SetIsEncrypted(segment_is_encrypted);
1947 }
1948 }
1949 assert(segment_sp.get());
1950
1951 lldb::SectionType sect_type = GetSectionType(flags: sect64.flags, section_name);
1952
1953 SectionSP section_sp(new Section(
1954 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1955 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1956 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1957 sect64.flags));
1958 // Set the section to be encrypted to match the segment
1959
1960 bool section_is_encrypted = false;
1961 if (!segment_is_encrypted && load_cmd.filesize != 0)
1962 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1963 addr: sect64.offset) != nullptr;
1964
1965 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1966 section_sp->SetPermissions(segment_permissions);
1967 segment_sp->GetChildren().AddSection(section_sp);
1968
1969 if (segment_sp->IsFake()) {
1970 segment_sp.reset();
1971 const_segname.Clear();
1972 }
1973 }
1974 }
1975 if (segment_sp && is_dsym) {
1976 if (first_segment_sectID <= context.NextSectionIdx) {
1977 lldb::user_id_t sect_uid;
1978 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1979 ++sect_uid) {
1980 SectionSP curr_section_sp(
1981 segment_sp->GetChildren().FindSectionByID(sect_id: sect_uid));
1982 SectionSP next_section_sp;
1983 if (sect_uid + 1 <= context.NextSectionIdx)
1984 next_section_sp =
1985 segment_sp->GetChildren().FindSectionByID(sect_id: sect_uid + 1);
1986
1987 if (curr_section_sp.get()) {
1988 if (curr_section_sp->GetByteSize() == 0) {
1989 if (next_section_sp.get() != nullptr)
1990 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1991 curr_section_sp->GetFileAddress());
1992 else
1993 curr_section_sp->SetByteSize(load_cmd.vmsize);
1994 }
1995 }
1996 }
1997 }
1998 }
1999}
2000
2001void ObjectFileMachO::ProcessDysymtabCommand(
2002 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
2003 m_dysymtab.cmd = load_cmd.cmd;
2004 m_dysymtab.cmdsize = load_cmd.cmdsize;
2005 m_data.GetU32(offset_ptr: &offset, dst: &m_dysymtab.ilocalsym,
2006 count: (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
2007}
2008
2009void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
2010 if (m_sections_up)
2011 return;
2012
2013 m_sections_up = std::make_unique<SectionList>();
2014
2015 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
2016 // bool dump_sections = false;
2017 ModuleSP module_sp(GetModule());
2018
2019 offset = MachHeaderSizeFromMagic(magic: m_header.magic);
2020
2021 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
2022 llvm::MachO::load_command load_cmd;
2023 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
2024 const lldb::offset_t load_cmd_offset = offset;
2025 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
2026 break;
2027
2028 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
2029 ProcessSegmentCommand(load_cmd_: load_cmd, offset, cmd_idx: i, context);
2030 else if (load_cmd.cmd == LC_DYSYMTAB)
2031 ProcessDysymtabCommand(load_cmd, offset);
2032
2033 offset = load_cmd_offset + load_cmd.cmdsize;
2034 }
2035
2036 if (context.FileAddressesChanged && module_sp)
2037 module_sp->SectionFileAddressesChanged();
2038}
2039
2040class MachSymtabSectionInfo {
2041public:
2042 MachSymtabSectionInfo(SectionList *section_list)
2043 : m_section_list(section_list), m_section_infos() {
2044 // Get the number of sections down to a depth of 1 to include all segments
2045 // and their sections, but no other sections that may be added for debug
2046 // map or
2047 m_section_infos.resize(new_size: section_list->GetNumSections(depth: 1));
2048 }
2049
2050 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
2051 if (n_sect == 0)
2052 return SectionSP();
2053 if (n_sect < m_section_infos.size()) {
2054 if (!m_section_infos[n_sect].section_sp) {
2055 SectionSP section_sp(m_section_list->FindSectionByID(sect_id: n_sect));
2056 m_section_infos[n_sect].section_sp = section_sp;
2057 if (section_sp) {
2058 m_section_infos[n_sect].vm_range.SetBaseAddress(
2059 section_sp->GetFileAddress());
2060 m_section_infos[n_sect].vm_range.SetByteSize(
2061 section_sp->GetByteSize());
2062 } else {
2063 std::string filename = "<unknown>";
2064 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(idx: 0));
2065 if (first_section_sp)
2066 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
2067
2068 Debugger::ReportError(
2069 message: llvm::formatv(Fmt: "unable to find section {0} for a symbol in "
2070 "{1}, corrupt file?",
2071 Vals&: n_sect, Vals&: filename));
2072 }
2073 }
2074 if (m_section_infos[n_sect].vm_range.Contains(addr: file_addr)) {
2075 // Symbol is in section.
2076 return m_section_infos[n_sect].section_sp;
2077 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
2078 m_section_infos[n_sect].vm_range.GetBaseAddress() ==
2079 file_addr) {
2080 // Symbol is in section with zero size, but has the same start address
2081 // as the section. This can happen with linker symbols (symbols that
2082 // start with the letter 'l' or 'L'.
2083 return m_section_infos[n_sect].section_sp;
2084 }
2085 }
2086 return m_section_list->FindSectionContainingFileAddress(addr: file_addr);
2087 }
2088
2089protected:
2090 struct SectionInfo {
2091 SectionInfo() : vm_range(), section_sp() {}
2092
2093 VMRange vm_range;
2094 SectionSP section_sp;
2095 };
2096 SectionList *m_section_list;
2097 std::vector<SectionInfo> m_section_infos;
2098};
2099
2100#define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
2101struct TrieEntry {
2102 void Dump() const {
2103 printf(format: "0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
2104 static_cast<unsigned long long>(address),
2105 static_cast<unsigned long long>(flags),
2106 static_cast<unsigned long long>(other), name.GetCString());
2107 if (import_name)
2108 printf(format: " -> \"%s\"\n", import_name.GetCString());
2109 else
2110 printf(format: "\n");
2111 }
2112 ConstString name;
2113 uint64_t address = LLDB_INVALID_ADDRESS;
2114 uint64_t flags =
2115 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
2116 // TRIE_SYMBOL_IS_THUMB
2117 uint64_t other = 0;
2118 ConstString import_name;
2119};
2120
2121struct TrieEntryWithOffset {
2122 lldb::offset_t nodeOffset;
2123 TrieEntry entry;
2124
2125 TrieEntryWithOffset(lldb::offset_t offset) : nodeOffset(offset), entry() {}
2126
2127 void Dump(uint32_t idx) const {
2128 printf(format: "[%3u] 0x%16.16llx: ", idx,
2129 static_cast<unsigned long long>(nodeOffset));
2130 entry.Dump();
2131 }
2132
2133 bool operator<(const TrieEntryWithOffset &other) const {
2134 return (nodeOffset < other.nodeOffset);
2135 }
2136};
2137
2138static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
2139 const bool is_arm, addr_t text_seg_base_addr,
2140 std::vector<llvm::StringRef> &nameSlices,
2141 std::set<lldb::addr_t> &resolver_addresses,
2142 std::vector<TrieEntryWithOffset> &reexports,
2143 std::vector<TrieEntryWithOffset> &ext_symbols) {
2144 if (!data.ValidOffset(offset))
2145 return true;
2146
2147 // Terminal node -- end of a branch, possibly add this to
2148 // the symbol table or resolver table.
2149 const uint64_t terminalSize = data.GetULEB128(offset_ptr: &offset);
2150 lldb::offset_t children_offset = offset + terminalSize;
2151 if (terminalSize != 0) {
2152 TrieEntryWithOffset e(offset);
2153 e.entry.flags = data.GetULEB128(offset_ptr: &offset);
2154 const char *import_name = nullptr;
2155 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2156 e.entry.address = 0;
2157 e.entry.other = data.GetULEB128(offset_ptr: &offset); // dylib ordinal
2158 import_name = data.GetCStr(offset_ptr: &offset);
2159 } else {
2160 e.entry.address = data.GetULEB128(offset_ptr: &offset);
2161 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2162 e.entry.address += text_seg_base_addr;
2163 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2164 e.entry.other = data.GetULEB128(offset_ptr: &offset);
2165 uint64_t resolver_addr = e.entry.other;
2166 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2167 resolver_addr += text_seg_base_addr;
2168 if (is_arm)
2169 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2170 resolver_addresses.insert(x: resolver_addr);
2171 } else
2172 e.entry.other = 0;
2173 }
2174 bool add_this_entry = false;
2175 if (Flags(e.entry.flags).Test(bit: EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2176 import_name && import_name[0]) {
2177 // add symbols that are reexport symbols with a valid import name.
2178 add_this_entry = true;
2179 } else if (e.entry.flags == 0 &&
2180 (import_name == nullptr || import_name[0] == '\0')) {
2181 // add externally visible symbols, in case the nlist record has
2182 // been stripped/omitted.
2183 add_this_entry = true;
2184 }
2185 if (add_this_entry) {
2186 std::string name;
2187 if (!nameSlices.empty()) {
2188 for (auto name_slice : nameSlices)
2189 name.append(s: name_slice.data(), n: name_slice.size());
2190 }
2191 if (name.size() > 1) {
2192 // Skip the leading '_'
2193 e.entry.name.SetCStringWithLength(cstr: name.c_str() + 1, cstr_len: name.size() - 1);
2194 }
2195 if (import_name) {
2196 // Skip the leading '_'
2197 e.entry.import_name.SetCString(import_name + 1);
2198 }
2199 if (Flags(e.entry.flags).Test(bit: EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2200 reexports.push_back(x: e);
2201 } else {
2202 if (is_arm && (e.entry.address & 1)) {
2203 e.entry.flags |= TRIE_SYMBOL_IS_THUMB;
2204 e.entry.address &= THUMB_ADDRESS_BIT_MASK;
2205 }
2206 ext_symbols.push_back(x: e);
2207 }
2208 }
2209 }
2210
2211 const uint8_t childrenCount = data.GetU8(offset_ptr: &children_offset);
2212 for (uint8_t i = 0; i < childrenCount; ++i) {
2213 const char *cstr = data.GetCStr(offset_ptr: &children_offset);
2214 if (cstr)
2215 nameSlices.push_back(x: llvm::StringRef(cstr));
2216 else
2217 return false; // Corrupt data
2218 lldb::offset_t childNodeOffset = data.GetULEB128(offset_ptr: &children_offset);
2219 if (childNodeOffset) {
2220 if (!ParseTrieEntries(data, offset: childNodeOffset, is_arm, text_seg_base_addr,
2221 nameSlices, resolver_addresses, reexports,
2222 ext_symbols)) {
2223 return false;
2224 }
2225 }
2226 nameSlices.pop_back();
2227 }
2228 return true;
2229}
2230
2231static SymbolType GetSymbolType(const char *&symbol_name,
2232 bool &demangled_is_synthesized,
2233 const SectionSP &text_section_sp,
2234 const SectionSP &data_section_sp,
2235 const SectionSP &data_dirty_section_sp,
2236 const SectionSP &data_const_section_sp,
2237 const SectionSP &symbol_section) {
2238 SymbolType type = eSymbolTypeInvalid;
2239
2240 const char *symbol_sect_name = symbol_section->GetName().AsCString();
2241 if (symbol_section->IsDescendant(section: text_section_sp.get())) {
2242 if (symbol_section->IsClear(bit: S_ATTR_PURE_INSTRUCTIONS |
2243 S_ATTR_SELF_MODIFYING_CODE |
2244 S_ATTR_SOME_INSTRUCTIONS))
2245 type = eSymbolTypeData;
2246 else
2247 type = eSymbolTypeCode;
2248 } else if (symbol_section->IsDescendant(section: data_section_sp.get()) ||
2249 symbol_section->IsDescendant(section: data_dirty_section_sp.get()) ||
2250 symbol_section->IsDescendant(section: data_const_section_sp.get())) {
2251 if (symbol_sect_name &&
2252 ::strstr(haystack: symbol_sect_name, needle: "__objc") == symbol_sect_name) {
2253 type = eSymbolTypeRuntime;
2254
2255 if (symbol_name) {
2256 llvm::StringRef symbol_name_ref(symbol_name);
2257 if (symbol_name_ref.starts_with(Prefix: "OBJC_")) {
2258 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2259 static const llvm::StringRef g_objc_v2_prefix_metaclass(
2260 "OBJC_METACLASS_$_");
2261 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2262 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
2263 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2264 type = eSymbolTypeObjCClass;
2265 demangled_is_synthesized = true;
2266 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_metaclass)) {
2267 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2268 type = eSymbolTypeObjCMetaClass;
2269 demangled_is_synthesized = true;
2270 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_ivar)) {
2271 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2272 type = eSymbolTypeObjCIVar;
2273 demangled_is_synthesized = true;
2274 }
2275 }
2276 }
2277 } else if (symbol_sect_name &&
2278 ::strstr(haystack: symbol_sect_name, needle: "__gcc_except_tab") ==
2279 symbol_sect_name) {
2280 type = eSymbolTypeException;
2281 } else {
2282 type = eSymbolTypeData;
2283 }
2284 } else if (symbol_sect_name &&
2285 ::strstr(haystack: symbol_sect_name, needle: "__IMPORT") == symbol_sect_name) {
2286 type = eSymbolTypeTrampoline;
2287 }
2288 return type;
2289}
2290
2291static std::optional<struct nlist_64>
2292ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2293 size_t nlist_byte_size) {
2294 struct nlist_64 nlist;
2295 if (!nlist_data.ValidOffsetForDataOfSize(offset: nlist_data_offset, length: nlist_byte_size))
2296 return {};
2297 nlist.n_strx = nlist_data.GetU32_unchecked(offset_ptr: &nlist_data_offset);
2298 nlist.n_type = nlist_data.GetU8_unchecked(offset_ptr: &nlist_data_offset);
2299 nlist.n_sect = nlist_data.GetU8_unchecked(offset_ptr: &nlist_data_offset);
2300 nlist.n_desc = nlist_data.GetU16_unchecked(offset_ptr: &nlist_data_offset);
2301 nlist.n_value = nlist_data.GetAddress_unchecked(offset_ptr: &nlist_data_offset);
2302 return nlist;
2303}
2304
2305enum { DebugSymbols = true, NonDebugSymbols = false };
2306
2307void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
2308 ModuleSP module_sp(GetModule());
2309 if (!module_sp)
2310 return;
2311
2312 Log *log = GetLog(mask: LLDBLog::Symbols);
2313
2314 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2315 const char *file_name = file.GetFilename().AsCString(value_if_empty: "<Unknown>");
2316 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2317 LLDB_LOG(log, "Parsing symbol table for {0}", file_name);
2318 Progress progress("Parsing symbol table", file_name);
2319
2320 llvm::MachO::linkedit_data_command function_starts_load_command = {.cmd: 0, .cmdsize: 0, .dataoff: 0, .datasize: 0};
2321 llvm::MachO::linkedit_data_command exports_trie_load_command = {.cmd: 0, .cmdsize: 0, .dataoff: 0, .datasize: 0};
2322 llvm::MachO::dyld_info_command dyld_info = {.cmd: 0, .cmdsize: 0, .rebase_off: 0, .rebase_size: 0, .bind_off: 0, .bind_size: 0, .weak_bind_off: 0, .weak_bind_size: 0, .lazy_bind_off: 0, .lazy_bind_size: 0, .export_off: 0, .export_size: 0};
2323 llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2324 SymtabCommandLargeOffsets symtab_load_command;
2325 // The data element of type bool indicates that this entry is thumb
2326 // code.
2327 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2328
2329 // Record the address of every function/data that we add to the symtab.
2330 // We add symbols to the table in the order of most information (nlist
2331 // records) to least (function starts), and avoid duplicating symbols
2332 // via this set.
2333 llvm::DenseSet<addr_t> symbols_added;
2334
2335 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2336 // do not add the tombstone or empty keys to the set.
2337 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2338 // Don't add the tombstone or empty keys.
2339 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2340 return;
2341 symbols_added.insert(V: file_addr);
2342 };
2343 FunctionStarts function_starts;
2344 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
2345 uint32_t i;
2346 FileSpecList dylib_files;
2347 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2348 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2349 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2350 UUID image_uuid;
2351
2352 for (i = 0; i < m_header.ncmds; ++i) {
2353 const lldb::offset_t cmd_offset = offset;
2354 // Read in the load command and load command size
2355 llvm::MachO::load_command lc;
2356 if (m_data.GetU32(offset_ptr: &offset, dst: &lc, count: 2) == nullptr)
2357 break;
2358 // Watch for the symbol table load command
2359 switch (lc.cmd) {
2360 case LC_SYMTAB:
2361 // struct symtab_command {
2362 // uint32_t cmd; /* LC_SYMTAB */
2363 // uint32_t cmdsize; /* sizeof(struct symtab_command) */
2364 // uint32_t symoff; /* symbol table offset */
2365 // uint32_t nsyms; /* number of symbol table entries */
2366 // uint32_t stroff; /* string table offset */
2367 // uint32_t strsize; /* string table size in bytes */
2368 // };
2369 symtab_load_command.cmd = lc.cmd;
2370 symtab_load_command.cmdsize = lc.cmdsize;
2371 symtab_load_command.symoff = m_data.GetU32(offset_ptr: &offset);
2372 symtab_load_command.nsyms = m_data.GetU32(offset_ptr: &offset);
2373 symtab_load_command.stroff = m_data.GetU32(offset_ptr: &offset);
2374 symtab_load_command.strsize = m_data.GetU32(offset_ptr: &offset);
2375 break;
2376
2377 case LC_DYLD_INFO:
2378 case LC_DYLD_INFO_ONLY:
2379 if (m_data.GetU32(offset_ptr: &offset, dst: &dyld_info.rebase_off, count: 10)) {
2380 dyld_info.cmd = lc.cmd;
2381 dyld_info.cmdsize = lc.cmdsize;
2382 } else {
2383 memset(s: &dyld_info, c: 0, n: sizeof(dyld_info));
2384 }
2385 break;
2386
2387 case LC_LOAD_DYLIB:
2388 case LC_LOAD_WEAK_DYLIB:
2389 case LC_REEXPORT_DYLIB:
2390 case LC_LOADFVMLIB:
2391 case LC_LOAD_UPWARD_DYLIB: {
2392 uint32_t name_offset = cmd_offset + m_data.GetU32(offset_ptr: &offset);
2393 const char *path = m_data.PeekCStr(offset: name_offset);
2394 if (path) {
2395 FileSpec file_spec(path);
2396 // Strip the path if there is @rpath, @executable, etc so we just use
2397 // the basename
2398 if (path[0] == '@')
2399 file_spec.ClearDirectory();
2400
2401 if (lc.cmd == LC_REEXPORT_DYLIB) {
2402 m_reexported_dylibs.AppendIfUnique(file: file_spec);
2403 }
2404
2405 dylib_files.Append(file: file_spec);
2406 }
2407 } break;
2408
2409 case LC_DYLD_EXPORTS_TRIE:
2410 exports_trie_load_command.cmd = lc.cmd;
2411 exports_trie_load_command.cmdsize = lc.cmdsize;
2412 if (m_data.GetU32(offset_ptr: &offset, dst: &exports_trie_load_command.dataoff, count: 2) ==
2413 nullptr) // fill in offset and size fields
2414 memset(s: &exports_trie_load_command, c: 0,
2415 n: sizeof(exports_trie_load_command));
2416 break;
2417 case LC_FUNCTION_STARTS:
2418 function_starts_load_command.cmd = lc.cmd;
2419 function_starts_load_command.cmdsize = lc.cmdsize;
2420 if (m_data.GetU32(offset_ptr: &offset, dst: &function_starts_load_command.dataoff, count: 2) ==
2421 nullptr) // fill in data offset and size fields
2422 memset(s: &function_starts_load_command, c: 0,
2423 n: sizeof(function_starts_load_command));
2424 break;
2425
2426 case LC_UUID: {
2427 const uint8_t *uuid_bytes = m_data.PeekData(offset, length: 16);
2428
2429 if (uuid_bytes)
2430 image_uuid = UUID(uuid_bytes, 16);
2431 break;
2432 }
2433
2434 default:
2435 break;
2436 }
2437 offset = cmd_offset + lc.cmdsize;
2438 }
2439
2440 if (!symtab_load_command.cmd)
2441 return;
2442
2443 SectionList *section_list = GetSectionList();
2444 if (section_list == nullptr)
2445 return;
2446
2447 const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2448 const ByteOrder byte_order = m_data.GetByteOrder();
2449 bool bit_width_32 = addr_byte_size == 4;
2450 const size_t nlist_byte_size =
2451 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2452
2453 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2454 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2455 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2456 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2457 addr_byte_size);
2458 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2459
2460 const addr_t nlist_data_byte_size =
2461 symtab_load_command.nsyms * nlist_byte_size;
2462 const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2463 addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2464
2465 ProcessSP process_sp(m_process_wp.lock());
2466 Process *process = process_sp.get();
2467
2468 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2469 bool is_shared_cache_image = IsSharedCacheBinary();
2470 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2471
2472 ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2473 ConstString g_segment_name_DATA = GetSegmentNameDATA();
2474 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2475 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2476 ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2477 ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2478 ConstString g_section_name_lldb_no_nlist = GetSectionNameLLDBNoNlist();
2479 SectionSP text_section_sp(
2480 section_list->FindSectionByName(section_dstr: g_segment_name_TEXT));
2481 SectionSP data_section_sp(
2482 section_list->FindSectionByName(section_dstr: g_segment_name_DATA));
2483 SectionSP linkedit_section_sp(
2484 section_list->FindSectionByName(section_dstr: GetSegmentNameLINKEDIT()));
2485 SectionSP data_dirty_section_sp(
2486 section_list->FindSectionByName(section_dstr: g_segment_name_DATA_DIRTY));
2487 SectionSP data_const_section_sp(
2488 section_list->FindSectionByName(section_dstr: g_segment_name_DATA_CONST));
2489 SectionSP objc_section_sp(
2490 section_list->FindSectionByName(section_dstr: g_segment_name_OBJC));
2491 SectionSP eh_frame_section_sp;
2492 SectionSP lldb_no_nlist_section_sp;
2493 if (text_section_sp.get()) {
2494 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2495 section_dstr: g_section_name_eh_frame);
2496 lldb_no_nlist_section_sp = text_section_sp->GetChildren().FindSectionByName(
2497 section_dstr: g_section_name_lldb_no_nlist);
2498 } else {
2499 eh_frame_section_sp =
2500 section_list->FindSectionByName(section_dstr: g_section_name_eh_frame);
2501 lldb_no_nlist_section_sp =
2502 section_list->FindSectionByName(section_dstr: g_section_name_lldb_no_nlist);
2503 }
2504
2505 if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2506 !is_local_shared_cache_image) {
2507 Target &target = process->GetTarget();
2508
2509 memory_module_load_level = target.GetMemoryModuleLoadLevel();
2510
2511 // If __TEXT,__lldb_no_nlist section is present in this binary,
2512 // and we're reading it out of memory, do not read any of the
2513 // nlist entries. They are not needed in lldb and it may be
2514 // expensive to load these. This is to handle a dylib consisting
2515 // of only metadata, no code, but it has many nlist entries.
2516 if (lldb_no_nlist_section_sp)
2517 memory_module_load_level = eMemoryModuleLoadLevelMinimal;
2518
2519 // Reading mach file from memory in a process or core file...
2520
2521 if (linkedit_section_sp) {
2522 addr_t linkedit_load_addr =
2523 linkedit_section_sp->GetLoadBaseAddress(target: &target);
2524 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2525 // We might be trying to access the symbol table before the
2526 // __LINKEDIT's load address has been set in the target. We can't
2527 // fail to read the symbol table, so calculate the right address
2528 // manually
2529 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2530 mach_header_load_address: m_memory_addr, mach_header_section: GetMachHeaderSection(), section: linkedit_section_sp.get());
2531 }
2532
2533 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2534 const addr_t symoff_addr = linkedit_load_addr +
2535 symtab_load_command.symoff -
2536 linkedit_file_offset;
2537 strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2538 linkedit_file_offset;
2539
2540 // Always load dyld - the dynamic linker - from memory if we didn't
2541 // find a binary anywhere else. lldb will not register
2542 // dylib/framework/bundle loads/unloads if we don't have the dyld
2543 // symbols, we force dyld to load from memory despite the user's
2544 // target.memory-module-load-level setting.
2545 if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2546 m_header.filetype == llvm::MachO::MH_DYLINKER) {
2547 DataBufferSP nlist_data_sp(
2548 ReadMemory(process_sp, addr: symoff_addr, byte_size: nlist_data_byte_size));
2549 if (nlist_data_sp)
2550 nlist_data.SetData(data_sp: nlist_data_sp, offset: 0, length: nlist_data_sp->GetByteSize());
2551 if (dysymtab.nindirectsyms != 0) {
2552 const addr_t indirect_syms_addr = linkedit_load_addr +
2553 dysymtab.indirectsymoff -
2554 linkedit_file_offset;
2555 DataBufferSP indirect_syms_data_sp(ReadMemory(
2556 process_sp, addr: indirect_syms_addr, byte_size: dysymtab.nindirectsyms * 4));
2557 if (indirect_syms_data_sp)
2558 indirect_symbol_index_data.SetData(
2559 data_sp: indirect_syms_data_sp, offset: 0, length: indirect_syms_data_sp->GetByteSize());
2560 // If this binary is outside the shared cache,
2561 // cache the string table.
2562 // Binaries in the shared cache all share a giant string table,
2563 // and we can't share the string tables across multiple
2564 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2565 // for every binary in the shared cache - it would be a big perf
2566 // problem. For binaries outside the shared cache, it's faster to
2567 // read the entire strtab at once instead of piece-by-piece as we
2568 // process the nlist records.
2569 if (!is_shared_cache_image) {
2570 DataBufferSP strtab_data_sp(
2571 ReadMemory(process_sp, addr: strtab_addr, byte_size: strtab_data_byte_size));
2572 if (strtab_data_sp) {
2573 strtab_data.SetData(data_sp: strtab_data_sp, offset: 0,
2574 length: strtab_data_sp->GetByteSize());
2575 }
2576 }
2577 }
2578 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2579 if (function_starts_load_command.cmd) {
2580 const addr_t func_start_addr =
2581 linkedit_load_addr + function_starts_load_command.dataoff -
2582 linkedit_file_offset;
2583 DataBufferSP func_start_data_sp(
2584 ReadMemory(process_sp, addr: func_start_addr,
2585 byte_size: function_starts_load_command.datasize));
2586 if (func_start_data_sp)
2587 function_starts_data.SetData(data_sp: func_start_data_sp, offset: 0,
2588 length: func_start_data_sp->GetByteSize());
2589 }
2590 }
2591 }
2592 }
2593 } else {
2594 if (is_local_shared_cache_image) {
2595 // The load commands in shared cache images are relative to the
2596 // beginning of the shared cache, not the library image. The
2597 // data we get handed when creating the ObjectFileMachO starts
2598 // at the beginning of a specific library and spans to the end
2599 // of the cache to be able to reach the shared LINKEDIT
2600 // segments. We need to convert the load command offsets to be
2601 // relative to the beginning of our specific image.
2602 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2603 lldb::offset_t linkedit_slide =
2604 linkedit_offset - m_linkedit_original_offset;
2605 symtab_load_command.symoff += linkedit_slide;
2606 symtab_load_command.stroff += linkedit_slide;
2607 dyld_info.export_off += linkedit_slide;
2608 dysymtab.indirectsymoff += linkedit_slide;
2609 function_starts_load_command.dataoff += linkedit_slide;
2610 exports_trie_load_command.dataoff += linkedit_slide;
2611 }
2612
2613 nlist_data.SetData(data: m_data, offset: symtab_load_command.symoff,
2614 length: nlist_data_byte_size);
2615 strtab_data.SetData(data: m_data, offset: symtab_load_command.stroff,
2616 length: strtab_data_byte_size);
2617
2618 // We shouldn't have exports data from both the LC_DYLD_INFO command
2619 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2620 lldbassert(!((dyld_info.export_size > 0)
2621 && (exports_trie_load_command.datasize > 0)));
2622 if (dyld_info.export_size > 0) {
2623 dyld_trie_data.SetData(data: m_data, offset: dyld_info.export_off,
2624 length: dyld_info.export_size);
2625 } else if (exports_trie_load_command.datasize > 0) {
2626 dyld_trie_data.SetData(data: m_data, offset: exports_trie_load_command.dataoff,
2627 length: exports_trie_load_command.datasize);
2628 }
2629
2630 if (dysymtab.nindirectsyms != 0) {
2631 indirect_symbol_index_data.SetData(data: m_data, offset: dysymtab.indirectsymoff,
2632 length: dysymtab.nindirectsyms * 4);
2633 }
2634 if (function_starts_load_command.cmd) {
2635 function_starts_data.SetData(data: m_data, offset: function_starts_load_command.dataoff,
2636 length: function_starts_load_command.datasize);
2637 }
2638 }
2639
2640 const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2641
2642 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2643 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2644
2645 // lldb works best if it knows the start address of all functions in a
2646 // module. Linker symbols or debug info are normally the best source of
2647 // information for start addr / size but they may be stripped in a released
2648 // binary. Two additional sources of information exist in Mach-O binaries:
2649 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2650 // function's start address in the
2651 // binary, relative to the text section.
2652 // eh_frame - the eh_frame FDEs have the start addr & size of
2653 // each function
2654 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2655 // all modern binaries.
2656 // Binaries built to run on older releases may need to use eh_frame
2657 // information.
2658
2659 if (text_section_sp && function_starts_data.GetByteSize()) {
2660 FunctionStarts::Entry function_start_entry;
2661 function_start_entry.data = false;
2662 lldb::offset_t function_start_offset = 0;
2663 function_start_entry.addr = text_section_sp->GetFileAddress();
2664 uint64_t delta;
2665 while ((delta = function_starts_data.GetULEB128(offset_ptr: &function_start_offset)) >
2666 0) {
2667 // Now append the current entry
2668 function_start_entry.addr += delta;
2669 if (is_arm) {
2670 if (function_start_entry.addr & 1) {
2671 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2672 function_start_entry.data = true;
2673 } else if (always_thumb) {
2674 function_start_entry.data = true;
2675 }
2676 }
2677 function_starts.Append(entry: function_start_entry);
2678 }
2679 } else {
2680 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2681 // load command claiming an eh_frame but it doesn't actually have the
2682 // eh_frame content. And if we have a dSYM, we don't need to do any of
2683 // this fill-in-the-missing-symbols works anyway - the debug info should
2684 // give us all the functions in the module.
2685 if (text_section_sp.get() && eh_frame_section_sp.get() &&
2686 m_type != eTypeDebugInfo) {
2687 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2688 DWARFCallFrameInfo::EH);
2689 DWARFCallFrameInfo::FunctionAddressAndSizeVector functions;
2690 eh_frame.GetFunctionAddressAndSizeVector(function_info&: functions);
2691 addr_t text_base_addr = text_section_sp->GetFileAddress();
2692 size_t count = functions.GetSize();
2693 for (size_t i = 0; i < count; ++i) {
2694 const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func =
2695 functions.GetEntryAtIndex(i);
2696 if (func) {
2697 FunctionStarts::Entry function_start_entry;
2698 function_start_entry.addr = func->base - text_base_addr;
2699 if (is_arm) {
2700 if (function_start_entry.addr & 1) {
2701 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2702 function_start_entry.data = true;
2703 } else if (always_thumb) {
2704 function_start_entry.data = true;
2705 }
2706 }
2707 function_starts.Append(entry: function_start_entry);
2708 }
2709 }
2710 }
2711 }
2712
2713 const size_t function_starts_count = function_starts.GetSize();
2714
2715 // For user process binaries (executables, dylibs, frameworks, bundles), if
2716 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2717 // going to assume the binary has been stripped. Don't allow assembly
2718 // language instruction emulation because we don't know proper function
2719 // start boundaries.
2720 //
2721 // For all other types of binaries (kernels, stand-alone bare board
2722 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2723 // sections - we should not make any assumptions about them based on that.
2724 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2725 m_allow_assembly_emulation_unwind_plans = false;
2726 Log *unwind_or_symbol_log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Unwind));
2727
2728 if (unwind_or_symbol_log)
2729 module_sp->LogMessage(
2730 log: unwind_or_symbol_log,
2731 format: "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2732 }
2733
2734 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2735 ? eh_frame_section_sp->GetID()
2736 : static_cast<user_id_t>(NO_SECT);
2737
2738 uint32_t N_SO_index = UINT32_MAX;
2739
2740 MachSymtabSectionInfo section_info(section_list);
2741 std::vector<uint32_t> N_FUN_indexes;
2742 std::vector<uint32_t> N_NSYM_indexes;
2743 std::vector<uint32_t> N_INCL_indexes;
2744 std::vector<uint32_t> N_BRAC_indexes;
2745 std::vector<uint32_t> N_COMM_indexes;
2746 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2747 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2748 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2749 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2750 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2751 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2752 // Any symbols that get merged into another will get an entry in this map
2753 // so we know
2754 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2755 uint32_t nlist_idx = 0;
2756 Symbol *symbol_ptr = nullptr;
2757
2758 uint32_t sym_idx = 0;
2759 Symbol *sym = nullptr;
2760 size_t num_syms = 0;
2761 std::string memory_symbol_name;
2762 uint32_t unmapped_local_symbols_found = 0;
2763
2764 std::vector<TrieEntryWithOffset> reexport_trie_entries;
2765 std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2766 std::set<lldb::addr_t> resolver_addresses;
2767
2768 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize();
2769 if (dyld_trie_data_size > 0) {
2770 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size);
2771 SectionSP text_segment_sp =
2772 GetSectionList()->FindSectionByName(section_dstr: GetSegmentNameTEXT());
2773 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2774 if (text_segment_sp)
2775 text_segment_file_addr = text_segment_sp->GetFileAddress();
2776 std::vector<llvm::StringRef> nameSlices;
2777 ParseTrieEntries(data&: dyld_trie_data, offset: 0, is_arm, text_seg_base_addr: text_segment_file_addr,
2778 nameSlices, resolver_addresses, reexports&: reexport_trie_entries,
2779 ext_symbols&: external_sym_trie_entries);
2780 }
2781
2782 typedef std::set<ConstString> IndirectSymbols;
2783 IndirectSymbols indirect_symbol_names;
2784
2785#if TARGET_OS_IPHONE
2786
2787 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2788 // optimized by moving LOCAL symbols out of the memory mapped portion of
2789 // the DSC. The symbol information has all been retained, but it isn't
2790 // available in the normal nlist data. However, there *are* duplicate
2791 // entries of *some*
2792 // LOCAL symbols in the normal nlist data. To handle this situation
2793 // correctly, we must first attempt
2794 // to parse any DSC unmapped symbol information. If we find any, we set a
2795 // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2796
2797 if (IsSharedCacheBinary()) {
2798 // Before we can start mapping the DSC, we need to make certain the
2799 // target process is actually using the cache we can find.
2800
2801 // Next we need to determine the correct path for the dyld shared cache.
2802
2803 ArchSpec header_arch = GetArchitecture();
2804
2805 UUID dsc_uuid;
2806 UUID process_shared_cache_uuid;
2807 addr_t process_shared_cache_base_addr;
2808
2809 if (process) {
2810 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2811 process_shared_cache_uuid);
2812 }
2813
2814 __block bool found_image = false;
2815 __block void *nlist_buffer = nullptr;
2816 __block unsigned nlist_count = 0;
2817 __block char *string_table = nullptr;
2818 __block vm_offset_t vm_nlist_memory = 0;
2819 __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2820 __block vm_offset_t vm_string_memory = 0;
2821 __block mach_msg_type_number_t vm_string_bytes_read = 0;
2822
2823 auto _ = llvm::make_scope_exit(^{
2824 if (vm_nlist_memory)
2825 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2826 if (vm_string_memory)
2827 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2828 });
2829
2830 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2831 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2832 UndefinedNameToDescMap undefined_name_to_desc;
2833 SymbolIndexToName reexport_shlib_needs_fixup;
2834
2835 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2836 uuid_t cache_uuid;
2837 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2838 if (found_image)
2839 return;
2840
2841 if (process_shared_cache_uuid.IsValid() &&
2842 process_shared_cache_uuid != UUID(&cache_uuid, 16))
2843 return;
2844
2845 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2846 uuid_t dsc_image_uuid;
2847 if (found_image)
2848 return;
2849
2850 dyld_image_copy_uuid(image, &dsc_image_uuid);
2851 if (image_uuid != UUID(dsc_image_uuid, 16))
2852 return;
2853
2854 found_image = true;
2855
2856 // Compute the size of the string table. We need to ask dyld for a
2857 // new SPI to avoid this step.
2858 dyld_image_local_nlist_content_4Symbolication(
2859 image, ^(const void *nlistStart, uint64_t nlistCount,
2860 const char *stringTable) {
2861 if (!nlistStart || !nlistCount)
2862 return;
2863
2864 // The buffers passed here are valid only inside the block.
2865 // Use vm_read to make a cheap copy of them available for our
2866 // processing later.
2867 kern_return_t ret =
2868 vm_read(mach_task_self(), (vm_address_t)nlistStart,
2869 nlist_byte_size * nlistCount, &vm_nlist_memory,
2870 &vm_nlist_bytes_read);
2871 if (ret != KERN_SUCCESS)
2872 return;
2873 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2874
2875 // We don't know the size of the string table. It's cheaper
2876 // to map the whole VM region than to determine the size by
2877 // parsing all the nlist entries.
2878 vm_address_t string_address = (vm_address_t)stringTable;
2879 vm_size_t region_size;
2880 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2881 vm_region_basic_info_data_t info;
2882 memory_object_name_t object;
2883 ret = vm_region_64(mach_task_self(), &string_address,
2884 &region_size, VM_REGION_BASIC_INFO_64,
2885 (vm_region_info_t)&info, &info_count, &object);
2886 if (ret != KERN_SUCCESS)
2887 return;
2888
2889 ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2890 region_size -
2891 ((vm_address_t)stringTable - string_address),
2892 &vm_string_memory, &vm_string_bytes_read);
2893 if (ret != KERN_SUCCESS)
2894 return;
2895
2896 nlist_buffer = (void *)vm_nlist_memory;
2897 string_table = (char *)vm_string_memory;
2898 nlist_count = nlistCount;
2899 });
2900 });
2901 });
2902 if (nlist_buffer) {
2903 DataExtractor dsc_local_symbols_data(nlist_buffer,
2904 nlist_count * nlist_byte_size,
2905 byte_order, addr_byte_size);
2906 unmapped_local_symbols_found = nlist_count;
2907
2908 // The normal nlist code cannot correctly size the Symbols
2909 // array, we need to allocate it here.
2910 sym = symtab.Resize(
2911 symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2912 unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2913 num_syms = symtab.GetNumSymbols();
2914
2915 lldb::offset_t nlist_data_offset = 0;
2916
2917 for (uint32_t nlist_index = 0;
2918 nlist_index < nlist_count;
2919 nlist_index++) {
2920 /////////////////////////////
2921 {
2922 std::optional<struct nlist_64> nlist_maybe =
2923 ParseNList(dsc_local_symbols_data, nlist_data_offset,
2924 nlist_byte_size);
2925 if (!nlist_maybe)
2926 break;
2927 struct nlist_64 nlist = *nlist_maybe;
2928
2929 SymbolType type = eSymbolTypeInvalid;
2930 const char *symbol_name = string_table + nlist.n_strx;
2931
2932 if (symbol_name == NULL) {
2933 // No symbol should be NULL, even the symbols with no
2934 // string values should have an offset zero which
2935 // points to an empty C-string
2936 Debugger::ReportError(llvm::formatv(
2937 "DSC unmapped local symbol[{0}] has invalid "
2938 "string table offset {1:x} in {2}, ignoring symbol",
2939 nlist_index, nlist.n_strx,
2940 module_sp->GetFileSpec().GetPath()));
2941 continue;
2942 }
2943 if (symbol_name[0] == '\0')
2944 symbol_name = NULL;
2945
2946 const char *symbol_name_non_abi_mangled = NULL;
2947
2948 SectionSP symbol_section;
2949 uint32_t symbol_byte_size = 0;
2950 bool add_nlist = true;
2951 bool is_debug = ((nlist.n_type & N_STAB) != 0);
2952 bool demangled_is_synthesized = false;
2953 bool is_gsym = false;
2954 bool set_value = true;
2955
2956 assert(sym_idx < num_syms);
2957
2958 sym[sym_idx].SetDebug(is_debug);
2959
2960 if (is_debug) {
2961 switch (nlist.n_type) {
2962 case N_GSYM:
2963 // global symbol: name,,NO_SECT,type,0
2964 // Sometimes the N_GSYM value contains the address.
2965
2966 // FIXME: In the .o files, we have a GSYM and a debug
2967 // symbol for all the ObjC data. They
2968 // have the same address, but we want to ensure that
2969 // we always find only the real symbol, 'cause we
2970 // don't currently correctly attribute the
2971 // GSYM one to the ObjCClass/Ivar/MetaClass
2972 // symbol type. This is a temporary hack to make
2973 // sure the ObjectiveC symbols get treated correctly.
2974 // To do this right, we should coalesce all the GSYM
2975 // & global symbols that have the same address.
2976
2977 is_gsym = true;
2978 sym[sym_idx].SetExternal(true);
2979
2980 if (symbol_name && symbol_name[0] == '_' &&
2981 symbol_name[1] == 'O') {
2982 llvm::StringRef symbol_name_ref(symbol_name);
2983 if (symbol_name_ref.starts_with(
2984 g_objc_v2_prefix_class)) {
2985 symbol_name_non_abi_mangled = symbol_name + 1;
2986 symbol_name =
2987 symbol_name + g_objc_v2_prefix_class.size();
2988 type = eSymbolTypeObjCClass;
2989 demangled_is_synthesized = true;
2990
2991 } else if (symbol_name_ref.starts_with(
2992 g_objc_v2_prefix_metaclass)) {
2993 symbol_name_non_abi_mangled = symbol_name + 1;
2994 symbol_name =
2995 symbol_name + g_objc_v2_prefix_metaclass.size();
2996 type = eSymbolTypeObjCMetaClass;
2997 demangled_is_synthesized = true;
2998 } else if (symbol_name_ref.starts_with(
2999 g_objc_v2_prefix_ivar)) {
3000 symbol_name_non_abi_mangled = symbol_name + 1;
3001 symbol_name =
3002 symbol_name + g_objc_v2_prefix_ivar.size();
3003 type = eSymbolTypeObjCIVar;
3004 demangled_is_synthesized = true;
3005 }
3006 } else {
3007 if (nlist.n_value != 0)
3008 symbol_section = section_info.GetSection(
3009 nlist.n_sect, nlist.n_value);
3010 type = eSymbolTypeData;
3011 }
3012 break;
3013
3014 case N_FNAME:
3015 // procedure name (f77 kludge): name,,NO_SECT,0,0
3016 type = eSymbolTypeCompiler;
3017 break;
3018
3019 case N_FUN:
3020 // procedure: name,,n_sect,linenumber,address
3021 if (symbol_name) {
3022 type = eSymbolTypeCode;
3023 symbol_section = section_info.GetSection(
3024 nlist.n_sect, nlist.n_value);
3025
3026 N_FUN_addr_to_sym_idx.insert(
3027 std::make_pair(nlist.n_value, sym_idx));
3028 // We use the current number of symbols in the
3029 // symbol table in lieu of using nlist_idx in case
3030 // we ever start trimming entries out
3031 N_FUN_indexes.push_back(sym_idx);
3032 } else {
3033 type = eSymbolTypeCompiler;
3034
3035 if (!N_FUN_indexes.empty()) {
3036 // Copy the size of the function into the
3037 // original
3038 // STAB entry so we don't have
3039 // to hunt for it later
3040 symtab.SymbolAtIndex(N_FUN_indexes.back())
3041 ->SetByteSize(nlist.n_value);
3042 N_FUN_indexes.pop_back();
3043 // We don't really need the end function STAB as
3044 // it contains the size which we already placed
3045 // with the original symbol, so don't add it if
3046 // we want a minimal symbol table
3047 add_nlist = false;
3048 }
3049 }
3050 break;
3051
3052 case N_STSYM:
3053 // static symbol: name,,n_sect,type,address
3054 N_STSYM_addr_to_sym_idx.insert(
3055 std::make_pair(nlist.n_value, sym_idx));
3056 symbol_section = section_info.GetSection(nlist.n_sect,
3057 nlist.n_value);
3058 if (symbol_name && symbol_name[0]) {
3059 type = ObjectFile::GetSymbolTypeFromName(
3060 symbol_name + 1, eSymbolTypeData);
3061 }
3062 break;
3063
3064 case N_LCSYM:
3065 // .lcomm symbol: name,,n_sect,type,address
3066 symbol_section = section_info.GetSection(nlist.n_sect,
3067 nlist.n_value);
3068 type = eSymbolTypeCommonBlock;
3069 break;
3070
3071 case N_BNSYM:
3072 // We use the current number of symbols in the symbol
3073 // table in lieu of using nlist_idx in case we ever
3074 // start trimming entries out Skip these if we want
3075 // minimal symbol tables
3076 add_nlist = false;
3077 break;
3078
3079 case N_ENSYM:
3080 // Set the size of the N_BNSYM to the terminating
3081 // index of this N_ENSYM so that we can always skip
3082 // the entire symbol if we need to navigate more
3083 // quickly at the source level when parsing STABS
3084 // Skip these if we want minimal symbol tables
3085 add_nlist = false;
3086 break;
3087
3088 case N_OPT:
3089 // emitted with gcc2_compiled and in gcc source
3090 type = eSymbolTypeCompiler;
3091 break;
3092
3093 case N_RSYM:
3094 // register sym: name,,NO_SECT,type,register
3095 type = eSymbolTypeVariable;
3096 break;
3097
3098 case N_SLINE:
3099 // src line: 0,,n_sect,linenumber,address
3100 symbol_section = section_info.GetSection(nlist.n_sect,
3101 nlist.n_value);
3102 type = eSymbolTypeLineEntry;
3103 break;
3104
3105 case N_SSYM:
3106 // structure elt: name,,NO_SECT,type,struct_offset
3107 type = eSymbolTypeVariableType;
3108 break;
3109
3110 case N_SO:
3111 // source file name
3112 type = eSymbolTypeSourceFile;
3113 if (symbol_name == NULL) {
3114 add_nlist = false;
3115 if (N_SO_index != UINT32_MAX) {
3116 // Set the size of the N_SO to the terminating
3117 // index of this N_SO so that we can always skip
3118 // the entire N_SO if we need to navigate more
3119 // quickly at the source level when parsing STABS
3120 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3121 symbol_ptr->SetByteSize(sym_idx);
3122 symbol_ptr->SetSizeIsSibling(true);
3123 }
3124 N_NSYM_indexes.clear();
3125 N_INCL_indexes.clear();
3126 N_BRAC_indexes.clear();
3127 N_COMM_indexes.clear();
3128 N_FUN_indexes.clear();
3129 N_SO_index = UINT32_MAX;
3130 } else {
3131 // We use the current number of symbols in the
3132 // symbol table in lieu of using nlist_idx in case
3133 // we ever start trimming entries out
3134 const bool N_SO_has_full_path = symbol_name[0] == '/';
3135 if (N_SO_has_full_path) {
3136 if ((N_SO_index == sym_idx - 1) &&
3137 ((sym_idx - 1) < num_syms)) {
3138 // We have two consecutive N_SO entries where
3139 // the first contains a directory and the
3140 // second contains a full path.
3141 sym[sym_idx - 1].GetMangled().SetValue(
3142 ConstString(symbol_name));
3143 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3144 add_nlist = false;
3145 } else {
3146 // This is the first entry in a N_SO that
3147 // contains a directory or
3148 // a full path to the source file
3149 N_SO_index = sym_idx;
3150 }
3151 } else if ((N_SO_index == sym_idx - 1) &&
3152 ((sym_idx - 1) < num_syms)) {
3153 // This is usually the second N_SO entry that
3154 // contains just the filename, so here we combine
3155 // it with the first one if we are minimizing the
3156 // symbol table
3157 const char *so_path = sym[sym_idx - 1]
3158 .GetMangled()
3159 .GetDemangledName()
3160 .AsCString();
3161 if (so_path && so_path[0]) {
3162 std::string full_so_path(so_path);
3163 const size_t double_slash_pos =
3164 full_so_path.find("//");
3165 if (double_slash_pos != std::string::npos) {
3166 // The linker has been generating bad N_SO
3167 // entries with doubled up paths
3168 // in the format "%s%s" where the first
3169 // string in the DW_AT_comp_dir, and the
3170 // second is the directory for the source
3171 // file so you end up with a path that looks
3172 // like "/tmp/src//tmp/src/"
3173 FileSpec so_dir(so_path);
3174 if (!FileSystem::Instance().Exists(so_dir)) {
3175 so_dir.SetFile(
3176 &full_so_path[double_slash_pos + 1],
3177 FileSpec::Style::native);
3178 if (FileSystem::Instance().Exists(so_dir)) {
3179 // Trim off the incorrect path
3180 full_so_path.erase(0, double_slash_pos + 1);
3181 }
3182 }
3183 }
3184 if (*full_so_path.rbegin() != '/')
3185 full_so_path += '/';
3186 full_so_path += symbol_name;
3187 sym[sym_idx - 1].GetMangled().SetValue(
3188 ConstString(full_so_path.c_str()));
3189 add_nlist = false;
3190 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3191 }
3192 } else {
3193 // This could be a relative path to a N_SO
3194 N_SO_index = sym_idx;
3195 }
3196 }
3197 break;
3198
3199 case N_OSO:
3200 // object file name: name,,0,0,st_mtime
3201 type = eSymbolTypeObjectFile;
3202 break;
3203
3204 case N_LSYM:
3205 // local sym: name,,NO_SECT,type,offset
3206 type = eSymbolTypeLocal;
3207 break;
3208
3209 // INCL scopes
3210 case N_BINCL:
3211 // include file beginning: name,,NO_SECT,0,sum We use
3212 // the current number of symbols in the symbol table
3213 // in lieu of using nlist_idx in case we ever start
3214 // trimming entries out
3215 N_INCL_indexes.push_back(sym_idx);
3216 type = eSymbolTypeScopeBegin;
3217 break;
3218
3219 case N_EINCL:
3220 // include file end: name,,NO_SECT,0,0
3221 // Set the size of the N_BINCL to the terminating
3222 // index of this N_EINCL so that we can always skip
3223 // the entire symbol if we need to navigate more
3224 // quickly at the source level when parsing STABS
3225 if (!N_INCL_indexes.empty()) {
3226 symbol_ptr =
3227 symtab.SymbolAtIndex(N_INCL_indexes.back());
3228 symbol_ptr->SetByteSize(sym_idx + 1);
3229 symbol_ptr->SetSizeIsSibling(true);
3230 N_INCL_indexes.pop_back();
3231 }
3232 type = eSymbolTypeScopeEnd;
3233 break;
3234
3235 case N_SOL:
3236 // #included file name: name,,n_sect,0,address
3237 type = eSymbolTypeHeaderFile;
3238
3239 // We currently don't use the header files on darwin
3240 add_nlist = false;
3241 break;
3242
3243 case N_PARAMS:
3244 // compiler parameters: name,,NO_SECT,0,0
3245 type = eSymbolTypeCompiler;
3246 break;
3247
3248 case N_VERSION:
3249 // compiler version: name,,NO_SECT,0,0
3250 type = eSymbolTypeCompiler;
3251 break;
3252
3253 case N_OLEVEL:
3254 // compiler -O level: name,,NO_SECT,0,0
3255 type = eSymbolTypeCompiler;
3256 break;
3257
3258 case N_PSYM:
3259 // parameter: name,,NO_SECT,type,offset
3260 type = eSymbolTypeVariable;
3261 break;
3262
3263 case N_ENTRY:
3264 // alternate entry: name,,n_sect,linenumber,address
3265 symbol_section = section_info.GetSection(nlist.n_sect,
3266 nlist.n_value);
3267 type = eSymbolTypeLineEntry;
3268 break;
3269
3270 // Left and Right Braces
3271 case N_LBRAC:
3272 // left bracket: 0,,NO_SECT,nesting level,address We
3273 // use the current number of symbols in the symbol
3274 // table in lieu of using nlist_idx in case we ever
3275 // start trimming entries out
3276 symbol_section = section_info.GetSection(nlist.n_sect,
3277 nlist.n_value);
3278 N_BRAC_indexes.push_back(sym_idx);
3279 type = eSymbolTypeScopeBegin;
3280 break;
3281
3282 case N_RBRAC:
3283 // right bracket: 0,,NO_SECT,nesting level,address
3284 // Set the size of the N_LBRAC to the terminating
3285 // index of this N_RBRAC so that we can always skip
3286 // the entire symbol if we need to navigate more
3287 // quickly at the source level when parsing STABS
3288 symbol_section = section_info.GetSection(nlist.n_sect,
3289 nlist.n_value);
3290 if (!N_BRAC_indexes.empty()) {
3291 symbol_ptr =
3292 symtab.SymbolAtIndex(N_BRAC_indexes.back());
3293 symbol_ptr->SetByteSize(sym_idx + 1);
3294 symbol_ptr->SetSizeIsSibling(true);
3295 N_BRAC_indexes.pop_back();
3296 }
3297 type = eSymbolTypeScopeEnd;
3298 break;
3299
3300 case N_EXCL:
3301 // deleted include file: name,,NO_SECT,0,sum
3302 type = eSymbolTypeHeaderFile;
3303 break;
3304
3305 // COMM scopes
3306 case N_BCOMM:
3307 // begin common: name,,NO_SECT,0,0
3308 // We use the current number of symbols in the symbol
3309 // table in lieu of using nlist_idx in case we ever
3310 // start trimming entries out
3311 type = eSymbolTypeScopeBegin;
3312 N_COMM_indexes.push_back(sym_idx);
3313 break;
3314
3315 case N_ECOML:
3316 // end common (local name): 0,,n_sect,0,address
3317 symbol_section = section_info.GetSection(nlist.n_sect,
3318 nlist.n_value);
3319 // Fall through
3320
3321 case N_ECOMM:
3322 // end common: name,,n_sect,0,0
3323 // Set the size of the N_BCOMM to the terminating
3324 // index of this N_ECOMM/N_ECOML so that we can
3325 // always skip the entire symbol if we need to
3326 // navigate more quickly at the source level when
3327 // parsing STABS
3328 if (!N_COMM_indexes.empty()) {
3329 symbol_ptr =
3330 symtab.SymbolAtIndex(N_COMM_indexes.back());
3331 symbol_ptr->SetByteSize(sym_idx + 1);
3332 symbol_ptr->SetSizeIsSibling(true);
3333 N_COMM_indexes.pop_back();
3334 }
3335 type = eSymbolTypeScopeEnd;
3336 break;
3337
3338 case N_LENG:
3339 // second stab entry with length information
3340 type = eSymbolTypeAdditional;
3341 break;
3342
3343 default:
3344 break;
3345 }
3346 } else {
3347 // uint8_t n_pext = N_PEXT & nlist.n_type;
3348 uint8_t n_type = N_TYPE & nlist.n_type;
3349 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3350
3351 switch (n_type) {
3352 case N_INDR: {
3353 const char *reexport_name_cstr =
3354 strtab_data.PeekCStr(nlist.n_value);
3355 if (reexport_name_cstr && reexport_name_cstr[0]) {
3356 type = eSymbolTypeReExported;
3357 ConstString reexport_name(
3358 reexport_name_cstr +
3359 ((reexport_name_cstr[0] == '_') ? 1 : 0));
3360 sym[sym_idx].SetReExportedSymbolName(reexport_name);
3361 set_value = false;
3362 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3363 indirect_symbol_names.insert(ConstString(
3364 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3365 } else
3366 type = eSymbolTypeUndefined;
3367 } break;
3368
3369 case N_UNDF:
3370 if (symbol_name && symbol_name[0]) {
3371 ConstString undefined_name(
3372 symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3373 undefined_name_to_desc[undefined_name] = nlist.n_desc;
3374 }
3375 // Fall through
3376 case N_PBUD:
3377 type = eSymbolTypeUndefined;
3378 break;
3379
3380 case N_ABS:
3381 type = eSymbolTypeAbsolute;
3382 break;
3383
3384 case N_SECT: {
3385 symbol_section = section_info.GetSection(nlist.n_sect,
3386 nlist.n_value);
3387
3388 if (symbol_section == NULL) {
3389 // TODO: warn about this?
3390 add_nlist = false;
3391 break;
3392 }
3393
3394 if (TEXT_eh_frame_sectID == nlist.n_sect) {
3395 type = eSymbolTypeException;
3396 } else {
3397 uint32_t section_type =
3398 symbol_section->Get() & SECTION_TYPE;
3399
3400 switch (section_type) {
3401 case S_CSTRING_LITERALS:
3402 type = eSymbolTypeData;
3403 break; // section with only literal C strings
3404 case S_4BYTE_LITERALS:
3405 type = eSymbolTypeData;
3406 break; // section with only 4 byte literals
3407 case S_8BYTE_LITERALS:
3408 type = eSymbolTypeData;
3409 break; // section with only 8 byte literals
3410 case S_LITERAL_POINTERS:
3411 type = eSymbolTypeTrampoline;
3412 break; // section with only pointers to literals
3413 case S_NON_LAZY_SYMBOL_POINTERS:
3414 type = eSymbolTypeTrampoline;
3415 break; // section with only non-lazy symbol
3416 // pointers
3417 case S_LAZY_SYMBOL_POINTERS:
3418 type = eSymbolTypeTrampoline;
3419 break; // section with only lazy symbol pointers
3420 case S_SYMBOL_STUBS:
3421 type = eSymbolTypeTrampoline;
3422 break; // section with only symbol stubs, byte
3423 // size of stub in the reserved2 field
3424 case S_MOD_INIT_FUNC_POINTERS:
3425 type = eSymbolTypeCode;
3426 break; // section with only function pointers for
3427 // initialization
3428 case S_MOD_TERM_FUNC_POINTERS:
3429 type = eSymbolTypeCode;
3430 break; // section with only function pointers for
3431 // termination
3432 case S_INTERPOSING:
3433 type = eSymbolTypeTrampoline;
3434 break; // section with only pairs of function
3435 // pointers for interposing
3436 case S_16BYTE_LITERALS:
3437 type = eSymbolTypeData;
3438 break; // section with only 16 byte literals
3439 case S_DTRACE_DOF:
3440 type = eSymbolTypeInstrumentation;
3441 break;
3442 case S_LAZY_DYLIB_SYMBOL_POINTERS:
3443 type = eSymbolTypeTrampoline;
3444 break;
3445 default:
3446 switch (symbol_section->GetType()) {
3447 case lldb::eSectionTypeCode:
3448 type = eSymbolTypeCode;
3449 break;
3450 case eSectionTypeData:
3451 case eSectionTypeDataCString: // Inlined C string
3452 // data
3453 case eSectionTypeDataCStringPointers: // Pointers
3454 // to C
3455 // string
3456 // data
3457 case eSectionTypeDataSymbolAddress: // Address of
3458 // a symbol in
3459 // the symbol
3460 // table
3461 case eSectionTypeData4:
3462 case eSectionTypeData8:
3463 case eSectionTypeData16:
3464 type = eSymbolTypeData;
3465 break;
3466 default:
3467 break;
3468 }
3469 break;
3470 }
3471
3472 if (type == eSymbolTypeInvalid) {
3473 const char *symbol_sect_name =
3474 symbol_section->GetName().AsCString();
3475 if (symbol_section->IsDescendant(
3476 text_section_sp.get())) {
3477 if (symbol_section->IsClear(
3478 S_ATTR_PURE_INSTRUCTIONS |
3479 S_ATTR_SELF_MODIFYING_CODE |
3480 S_ATTR_SOME_INSTRUCTIONS))
3481 type = eSymbolTypeData;
3482 else
3483 type = eSymbolTypeCode;
3484 } else if (symbol_section->IsDescendant(
3485 data_section_sp.get()) ||
3486 symbol_section->IsDescendant(
3487 data_dirty_section_sp.get()) ||
3488 symbol_section->IsDescendant(
3489 data_const_section_sp.get())) {
3490 if (symbol_sect_name &&
3491 ::strstr(symbol_sect_name, "__objc") ==
3492 symbol_sect_name) {
3493 type = eSymbolTypeRuntime;
3494
3495 if (symbol_name) {
3496 llvm::StringRef symbol_name_ref(symbol_name);
3497 if (symbol_name_ref.starts_with("_OBJC_")) {
3498 llvm::StringRef
3499 g_objc_v2_prefix_class(
3500 "_OBJC_CLASS_$_");
3501 llvm::StringRef
3502 g_objc_v2_prefix_metaclass(
3503 "_OBJC_METACLASS_$_");
3504 llvm::StringRef
3505 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3506 if (symbol_name_ref.starts_with(
3507 g_objc_v2_prefix_class)) {
3508 symbol_name_non_abi_mangled =
3509 symbol_name + 1;
3510 symbol_name =
3511 symbol_name +
3512 g_objc_v2_prefix_class.size();
3513 type = eSymbolTypeObjCClass;
3514 demangled_is_synthesized = true;
3515 } else if (
3516 symbol_name_ref.starts_with(
3517 g_objc_v2_prefix_metaclass)) {
3518 symbol_name_non_abi_mangled =
3519 symbol_name + 1;
3520 symbol_name =
3521 symbol_name +
3522 g_objc_v2_prefix_metaclass.size();
3523 type = eSymbolTypeObjCMetaClass;
3524 demangled_is_synthesized = true;
3525 } else if (symbol_name_ref.starts_with(
3526 g_objc_v2_prefix_ivar)) {
3527 symbol_name_non_abi_mangled =
3528 symbol_name + 1;
3529 symbol_name =
3530 symbol_name +
3531 g_objc_v2_prefix_ivar.size();
3532 type = eSymbolTypeObjCIVar;
3533 demangled_is_synthesized = true;
3534 }
3535 }
3536 }
3537 } else if (symbol_sect_name &&
3538 ::strstr(symbol_sect_name,
3539 "__gcc_except_tab") ==
3540 symbol_sect_name) {
3541 type = eSymbolTypeException;
3542 } else {
3543 type = eSymbolTypeData;
3544 }
3545 } else if (symbol_sect_name &&
3546 ::strstr(symbol_sect_name, "__IMPORT") ==
3547 symbol_sect_name) {
3548 type = eSymbolTypeTrampoline;
3549 } else if (symbol_section->IsDescendant(
3550 objc_section_sp.get())) {
3551 type = eSymbolTypeRuntime;
3552 if (symbol_name && symbol_name[0] == '.') {
3553 llvm::StringRef symbol_name_ref(symbol_name);
3554 llvm::StringRef
3555 g_objc_v1_prefix_class(".objc_class_name_");
3556 if (symbol_name_ref.starts_with(
3557 g_objc_v1_prefix_class)) {
3558 symbol_name_non_abi_mangled = symbol_name;
3559 symbol_name = symbol_name +
3560 g_objc_v1_prefix_class.size();
3561 type = eSymbolTypeObjCClass;
3562 demangled_is_synthesized = true;
3563 }
3564 }
3565 }
3566 }
3567 }
3568 } break;
3569 }
3570 }
3571
3572 if (add_nlist) {
3573 uint64_t symbol_value = nlist.n_value;
3574 if (symbol_name_non_abi_mangled) {
3575 sym[sym_idx].GetMangled().SetMangledName(
3576 ConstString(symbol_name_non_abi_mangled));
3577 sym[sym_idx].GetMangled().SetDemangledName(
3578 ConstString(symbol_name));
3579 } else {
3580 if (symbol_name && symbol_name[0] == '_') {
3581 symbol_name++; // Skip the leading underscore
3582 }
3583
3584 if (symbol_name) {
3585 ConstString const_symbol_name(symbol_name);
3586 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
3587 if (is_gsym && is_debug) {
3588 const char *gsym_name =
3589 sym[sym_idx]
3590 .GetMangled()
3591 .GetName(Mangled::ePreferMangled)
3592 .GetCString();
3593 if (gsym_name)
3594 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3595 }
3596 }
3597 }
3598 if (symbol_section) {
3599 const addr_t section_file_addr =
3600 symbol_section->GetFileAddress();
3601 if (symbol_byte_size == 0 &&
3602 function_starts_count > 0) {
3603 addr_t symbol_lookup_file_addr = nlist.n_value;
3604 // Do an exact address match for non-ARM addresses,
3605 // else get the closest since the symbol might be a
3606 // thumb symbol which has an address with bit zero
3607 // set
3608 FunctionStarts::Entry *func_start_entry =
3609 function_starts.FindEntry(symbol_lookup_file_addr,
3610 !is_arm);
3611 if (is_arm && func_start_entry) {
3612 // Verify that the function start address is the
3613 // symbol address (ARM) or the symbol address + 1
3614 // (thumb)
3615 if (func_start_entry->addr !=
3616 symbol_lookup_file_addr &&
3617 func_start_entry->addr !=
3618 (symbol_lookup_file_addr + 1)) {
3619 // Not the right entry, NULL it out...
3620 func_start_entry = NULL;
3621 }
3622 }
3623 if (func_start_entry) {
3624 func_start_entry->data = true;
3625
3626 addr_t symbol_file_addr = func_start_entry->addr;
3627 uint32_t symbol_flags = 0;
3628 if (is_arm) {
3629 if (symbol_file_addr & 1)
3630 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3631 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3632 }
3633
3634 const FunctionStarts::Entry *next_func_start_entry =
3635 function_starts.FindNextEntry(func_start_entry);
3636 const addr_t section_end_file_addr =
3637 section_file_addr +
3638 symbol_section->GetByteSize();
3639 if (next_func_start_entry) {
3640 addr_t next_symbol_file_addr =
3641 next_func_start_entry->addr;
3642 // Be sure the clear the Thumb address bit when
3643 // we calculate the size from the current and
3644 // next address
3645 if (is_arm)
3646 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3647 symbol_byte_size = std::min<lldb::addr_t>(
3648 next_symbol_file_addr - symbol_file_addr,
3649 section_end_file_addr - symbol_file_addr);
3650 } else {
3651 symbol_byte_size =
3652 section_end_file_addr - symbol_file_addr;
3653 }
3654 }
3655 }
3656 symbol_value -= section_file_addr;
3657 }
3658
3659 if (is_debug == false) {
3660 if (type == eSymbolTypeCode) {
3661 // See if we can find a N_FUN entry for any code
3662 // symbols. If we do find a match, and the name
3663 // matches, then we can merge the two into just the
3664 // function symbol to avoid duplicate entries in
3665 // the symbol table
3666 auto range =
3667 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3668 if (range.first != range.second) {
3669 bool found_it = false;
3670 for (auto pos = range.first; pos != range.second;
3671 ++pos) {
3672 if (sym[sym_idx].GetMangled().GetName(
3673 Mangled::ePreferMangled) ==
3674 sym[pos->second].GetMangled().GetName(
3675 Mangled::ePreferMangled)) {
3676 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3677 // We just need the flags from the linker
3678 // symbol, so put these flags
3679 // into the N_FUN flags to avoid duplicate
3680 // symbols in the symbol table
3681 sym[pos->second].SetExternal(
3682 sym[sym_idx].IsExternal());
3683 sym[pos->second].SetFlags(nlist.n_type << 16 |
3684 nlist.n_desc);
3685 if (resolver_addresses.find(nlist.n_value) !=
3686 resolver_addresses.end())
3687 sym[pos->second].SetType(eSymbolTypeResolver);
3688 sym[sym_idx].Clear();
3689 found_it = true;
3690 break;
3691 }
3692 }
3693 if (found_it)
3694 continue;
3695 } else {
3696 if (resolver_addresses.find(nlist.n_value) !=
3697 resolver_addresses.end())
3698 type = eSymbolTypeResolver;
3699 }
3700 } else if (type == eSymbolTypeData ||
3701 type == eSymbolTypeObjCClass ||
3702 type == eSymbolTypeObjCMetaClass ||
3703 type == eSymbolTypeObjCIVar) {
3704 // See if we can find a N_STSYM entry for any data
3705 // symbols. If we do find a match, and the name
3706 // matches, then we can merge the two into just the
3707 // Static symbol to avoid duplicate entries in the
3708 // symbol table
3709 auto range = N_STSYM_addr_to_sym_idx.equal_range(
3710 nlist.n_value);
3711 if (range.first != range.second) {
3712 bool found_it = false;
3713 for (auto pos = range.first; pos != range.second;
3714 ++pos) {
3715 if (sym[sym_idx].GetMangled().GetName(
3716 Mangled::ePreferMangled) ==
3717 sym[pos->second].GetMangled().GetName(
3718 Mangled::ePreferMangled)) {
3719 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3720 // We just need the flags from the linker
3721 // symbol, so put these flags
3722 // into the N_STSYM flags to avoid duplicate
3723 // symbols in the symbol table
3724 sym[pos->second].SetExternal(
3725 sym[sym_idx].IsExternal());
3726 sym[pos->second].SetFlags(nlist.n_type << 16 |
3727 nlist.n_desc);
3728 sym[sym_idx].Clear();
3729 found_it = true;
3730 break;
3731 }
3732 }
3733 if (found_it)
3734 continue;
3735 } else {
3736 const char *gsym_name =
3737 sym[sym_idx]
3738 .GetMangled()
3739 .GetName(Mangled::ePreferMangled)
3740 .GetCString();
3741 if (gsym_name) {
3742 // Combine N_GSYM stab entries with the non
3743 // stab symbol
3744 ConstNameToSymbolIndexMap::const_iterator pos =
3745 N_GSYM_name_to_sym_idx.find(gsym_name);
3746 if (pos != N_GSYM_name_to_sym_idx.end()) {
3747 const uint32_t GSYM_sym_idx = pos->second;
3748 m_nlist_idx_to_sym_idx[nlist_idx] =
3749 GSYM_sym_idx;
3750 // Copy the address, because often the N_GSYM
3751 // address has an invalid address of zero
3752 // when the global is a common symbol
3753 sym[GSYM_sym_idx].GetAddressRef().SetSection(
3754 symbol_section);
3755 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3756 symbol_value);
3757 add_symbol_addr(sym[GSYM_sym_idx]
3758 .GetAddress()
3759 .GetFileAddress());
3760 // We just need the flags from the linker
3761 // symbol, so put these flags
3762 // into the N_GSYM flags to avoid duplicate
3763 // symbols in the symbol table
3764 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3765 nlist.n_desc);
3766 sym[sym_idx].Clear();
3767 continue;
3768 }
3769 }
3770 }
3771 }
3772 }
3773
3774 sym[sym_idx].SetID(nlist_idx);
3775 sym[sym_idx].SetType(type);
3776 if (set_value) {
3777 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3778 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3779 add_symbol_addr(
3780 sym[sym_idx].GetAddress().GetFileAddress());
3781 }
3782 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3783
3784 if (symbol_byte_size > 0)
3785 sym[sym_idx].SetByteSize(symbol_byte_size);
3786
3787 if (demangled_is_synthesized)
3788 sym[sym_idx].SetDemangledNameIsSynthesized(true);
3789 ++sym_idx;
3790 } else {
3791 sym[sym_idx].Clear();
3792 }
3793 }
3794 /////////////////////////////
3795 }
3796 }
3797
3798 for (const auto &pos : reexport_shlib_needs_fixup) {
3799 const auto undef_pos = undefined_name_to_desc.find(pos.second);
3800 if (undef_pos != undefined_name_to_desc.end()) {
3801 const uint8_t dylib_ordinal =
3802 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3803 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3804 sym[pos.first].SetReExportedSymbolSharedLibrary(
3805 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3806 }
3807 }
3808 }
3809
3810#endif
3811 lldb::offset_t nlist_data_offset = 0;
3812
3813 if (nlist_data.GetByteSize() > 0) {
3814
3815 // If the sym array was not created while parsing the DSC unmapped
3816 // symbols, create it now.
3817 if (sym == nullptr) {
3818 sym =
3819 symtab.Resize(count: symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3820 num_syms = symtab.GetNumSymbols();
3821 }
3822
3823 if (unmapped_local_symbols_found) {
3824 assert(m_dysymtab.ilocalsym == 0);
3825 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3826 nlist_idx = m_dysymtab.nlocalsym;
3827 } else {
3828 nlist_idx = 0;
3829 }
3830
3831 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3832 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3833 UndefinedNameToDescMap undefined_name_to_desc;
3834 SymbolIndexToName reexport_shlib_needs_fixup;
3835
3836 // Symtab parsing is a huge mess. Everything is entangled and the code
3837 // requires access to a ridiculous amount of variables. LLDB depends
3838 // heavily on the proper merging of symbols and to get that right we need
3839 // to make sure we have parsed all the debug symbols first. Therefore we
3840 // invoke the lambda twice, once to parse only the debug symbols and then
3841 // once more to parse the remaining symbols.
3842 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3843 bool debug_only) {
3844 const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3845 if (is_debug != debug_only)
3846 return true;
3847
3848 const char *symbol_name_non_abi_mangled = nullptr;
3849 const char *symbol_name = nullptr;
3850
3851 if (have_strtab_data) {
3852 symbol_name = strtab_data.PeekCStr(offset: nlist.n_strx);
3853
3854 if (symbol_name == nullptr) {
3855 // No symbol should be NULL, even the symbols with no string values
3856 // should have an offset zero which points to an empty C-string
3857 Debugger::ReportError(message: llvm::formatv(
3858 Fmt: "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3859 "ignoring symbol",
3860 Vals&: nlist_idx, Vals&: nlist.n_strx, Vals: module_sp->GetFileSpec().GetPath()));
3861 return true;
3862 }
3863 if (symbol_name[0] == '\0')
3864 symbol_name = nullptr;
3865 } else {
3866 const addr_t str_addr = strtab_addr + nlist.n_strx;
3867 Status str_error;
3868 if (process->ReadCStringFromMemory(vm_addr: str_addr, out_str&: memory_symbol_name,
3869 error&: str_error))
3870 symbol_name = memory_symbol_name.c_str();
3871 }
3872
3873 SymbolType type = eSymbolTypeInvalid;
3874 SectionSP symbol_section;
3875 lldb::addr_t symbol_byte_size = 0;
3876 bool add_nlist = true;
3877 bool is_gsym = false;
3878 bool demangled_is_synthesized = false;
3879 bool set_value = true;
3880
3881 assert(sym_idx < num_syms);
3882 sym[sym_idx].SetDebug(is_debug);
3883
3884 if (is_debug) {
3885 switch (nlist.n_type) {
3886 case N_GSYM:
3887 // global symbol: name,,NO_SECT,type,0
3888 // Sometimes the N_GSYM value contains the address.
3889
3890 // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3891 // the ObjC data. They
3892 // have the same address, but we want to ensure that we always find
3893 // only the real symbol, 'cause we don't currently correctly
3894 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3895 // type. This is a temporary hack to make sure the ObjectiveC
3896 // symbols get treated correctly. To do this right, we should
3897 // coalesce all the GSYM & global symbols that have the same
3898 // address.
3899 is_gsym = true;
3900 sym[sym_idx].SetExternal(true);
3901
3902 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3903 llvm::StringRef symbol_name_ref(symbol_name);
3904 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
3905 symbol_name_non_abi_mangled = symbol_name + 1;
3906 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3907 type = eSymbolTypeObjCClass;
3908 demangled_is_synthesized = true;
3909
3910 } else if (symbol_name_ref.starts_with(
3911 Prefix: g_objc_v2_prefix_metaclass)) {
3912 symbol_name_non_abi_mangled = symbol_name + 1;
3913 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3914 type = eSymbolTypeObjCMetaClass;
3915 demangled_is_synthesized = true;
3916 } else if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_ivar)) {
3917 symbol_name_non_abi_mangled = symbol_name + 1;
3918 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3919 type = eSymbolTypeObjCIVar;
3920 demangled_is_synthesized = true;
3921 }
3922 } else {
3923 if (nlist.n_value != 0)
3924 symbol_section =
3925 section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3926 type = eSymbolTypeData;
3927 }
3928 break;
3929
3930 case N_FNAME:
3931 // procedure name (f77 kludge): name,,NO_SECT,0,0
3932 type = eSymbolTypeCompiler;
3933 break;
3934
3935 case N_FUN:
3936 // procedure: name,,n_sect,linenumber,address
3937 if (symbol_name) {
3938 type = eSymbolTypeCode;
3939 symbol_section =
3940 section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3941
3942 N_FUN_addr_to_sym_idx.insert(
3943 x: std::make_pair(x&: nlist.n_value, y&: sym_idx));
3944 // We use the current number of symbols in the symbol table in
3945 // lieu of using nlist_idx in case we ever start trimming entries
3946 // out
3947 N_FUN_indexes.push_back(x: sym_idx);
3948 } else {
3949 type = eSymbolTypeCompiler;
3950
3951 if (!N_FUN_indexes.empty()) {
3952 // Copy the size of the function into the original STAB entry
3953 // so we don't have to hunt for it later
3954 symtab.SymbolAtIndex(idx: N_FUN_indexes.back())
3955 ->SetByteSize(nlist.n_value);
3956 N_FUN_indexes.pop_back();
3957 // We don't really need the end function STAB as it contains
3958 // the size which we already placed with the original symbol,
3959 // so don't add it if we want a minimal symbol table
3960 add_nlist = false;
3961 }
3962 }
3963 break;
3964
3965 case N_STSYM:
3966 // static symbol: name,,n_sect,type,address
3967 N_STSYM_addr_to_sym_idx.insert(
3968 x: std::make_pair(x&: nlist.n_value, y&: sym_idx));
3969 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3970 if (symbol_name && symbol_name[0]) {
3971 type = ObjectFile::GetSymbolTypeFromName(name: symbol_name + 1,
3972 symbol_type_hint: eSymbolTypeData);
3973 }
3974 break;
3975
3976 case N_LCSYM:
3977 // .lcomm symbol: name,,n_sect,type,address
3978 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
3979 type = eSymbolTypeCommonBlock;
3980 break;
3981
3982 case N_BNSYM:
3983 // We use the current number of symbols in the symbol table in lieu
3984 // of using nlist_idx in case we ever start trimming entries out
3985 // Skip these if we want minimal symbol tables
3986 add_nlist = false;
3987 break;
3988
3989 case N_ENSYM:
3990 // Set the size of the N_BNSYM to the terminating index of this
3991 // N_ENSYM so that we can always skip the entire symbol if we need
3992 // to navigate more quickly at the source level when parsing STABS
3993 // Skip these if we want minimal symbol tables
3994 add_nlist = false;
3995 break;
3996
3997 case N_OPT:
3998 // emitted with gcc2_compiled and in gcc source
3999 type = eSymbolTypeCompiler;
4000 break;
4001
4002 case N_RSYM:
4003 // register sym: name,,NO_SECT,type,register
4004 type = eSymbolTypeVariable;
4005 break;
4006
4007 case N_SLINE:
4008 // src line: 0,,n_sect,linenumber,address
4009 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4010 type = eSymbolTypeLineEntry;
4011 break;
4012
4013 case N_SSYM:
4014 // structure elt: name,,NO_SECT,type,struct_offset
4015 type = eSymbolTypeVariableType;
4016 break;
4017
4018 case N_SO:
4019 // source file name
4020 type = eSymbolTypeSourceFile;
4021 if (symbol_name == nullptr) {
4022 add_nlist = false;
4023 if (N_SO_index != UINT32_MAX) {
4024 // Set the size of the N_SO to the terminating index of this
4025 // N_SO so that we can always skip the entire N_SO if we need
4026 // to navigate more quickly at the source level when parsing
4027 // STABS
4028 symbol_ptr = symtab.SymbolAtIndex(idx: N_SO_index);
4029 symbol_ptr->SetByteSize(sym_idx);
4030 symbol_ptr->SetSizeIsSibling(true);
4031 }
4032 N_NSYM_indexes.clear();
4033 N_INCL_indexes.clear();
4034 N_BRAC_indexes.clear();
4035 N_COMM_indexes.clear();
4036 N_FUN_indexes.clear();
4037 N_SO_index = UINT32_MAX;
4038 } else {
4039 // We use the current number of symbols in the symbol table in
4040 // lieu of using nlist_idx in case we ever start trimming entries
4041 // out
4042 const bool N_SO_has_full_path = symbol_name[0] == '/';
4043 if (N_SO_has_full_path) {
4044 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
4045 // We have two consecutive N_SO entries where the first
4046 // contains a directory and the second contains a full path.
4047 sym[sym_idx - 1].GetMangled().SetValue(
4048 ConstString(symbol_name));
4049 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4050 add_nlist = false;
4051 } else {
4052 // This is the first entry in a N_SO that contains a
4053 // directory or a full path to the source file
4054 N_SO_index = sym_idx;
4055 }
4056 } else if ((N_SO_index == sym_idx - 1) &&
4057 ((sym_idx - 1) < num_syms)) {
4058 // This is usually the second N_SO entry that contains just the
4059 // filename, so here we combine it with the first one if we are
4060 // minimizing the symbol table
4061 const char *so_path =
4062 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
4063 if (so_path && so_path[0]) {
4064 std::string full_so_path(so_path);
4065 const size_t double_slash_pos = full_so_path.find(s: "//");
4066 if (double_slash_pos != std::string::npos) {
4067 // The linker has been generating bad N_SO entries with
4068 // doubled up paths in the format "%s%s" where the first
4069 // string in the DW_AT_comp_dir, and the second is the
4070 // directory for the source file so you end up with a path
4071 // that looks like "/tmp/src//tmp/src/"
4072 FileSpec so_dir(so_path);
4073 if (!FileSystem::Instance().Exists(file_spec: so_dir)) {
4074 so_dir.SetFile(path: &full_so_path[double_slash_pos + 1],
4075 style: FileSpec::Style::native);
4076 if (FileSystem::Instance().Exists(file_spec: so_dir)) {
4077 // Trim off the incorrect path
4078 full_so_path.erase(pos: 0, n: double_slash_pos + 1);
4079 }
4080 }
4081 }
4082 if (*full_so_path.rbegin() != '/')
4083 full_so_path += '/';
4084 full_so_path += symbol_name;
4085 sym[sym_idx - 1].GetMangled().SetValue(
4086 ConstString(full_so_path.c_str()));
4087 add_nlist = false;
4088 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4089 }
4090 } else {
4091 // This could be a relative path to a N_SO
4092 N_SO_index = sym_idx;
4093 }
4094 }
4095 break;
4096
4097 case N_OSO:
4098 // object file name: name,,0,0,st_mtime
4099 type = eSymbolTypeObjectFile;
4100 break;
4101
4102 case N_LSYM:
4103 // local sym: name,,NO_SECT,type,offset
4104 type = eSymbolTypeLocal;
4105 break;
4106
4107 // INCL scopes
4108 case N_BINCL:
4109 // include file beginning: name,,NO_SECT,0,sum We use the current
4110 // number of symbols in the symbol table in lieu of using nlist_idx
4111 // in case we ever start trimming entries out
4112 N_INCL_indexes.push_back(x: sym_idx);
4113 type = eSymbolTypeScopeBegin;
4114 break;
4115
4116 case N_EINCL:
4117 // include file end: name,,NO_SECT,0,0
4118 // Set the size of the N_BINCL to the terminating index of this
4119 // N_EINCL so that we can always skip the entire symbol if we need
4120 // to navigate more quickly at the source level when parsing STABS
4121 if (!N_INCL_indexes.empty()) {
4122 symbol_ptr = symtab.SymbolAtIndex(idx: N_INCL_indexes.back());
4123 symbol_ptr->SetByteSize(sym_idx + 1);
4124 symbol_ptr->SetSizeIsSibling(true);
4125 N_INCL_indexes.pop_back();
4126 }
4127 type = eSymbolTypeScopeEnd;
4128 break;
4129
4130 case N_SOL:
4131 // #included file name: name,,n_sect,0,address
4132 type = eSymbolTypeHeaderFile;
4133
4134 // We currently don't use the header files on darwin
4135 add_nlist = false;
4136 break;
4137
4138 case N_PARAMS:
4139 // compiler parameters: name,,NO_SECT,0,0
4140 type = eSymbolTypeCompiler;
4141 break;
4142
4143 case N_VERSION:
4144 // compiler version: name,,NO_SECT,0,0
4145 type = eSymbolTypeCompiler;
4146 break;
4147
4148 case N_OLEVEL:
4149 // compiler -O level: name,,NO_SECT,0,0
4150 type = eSymbolTypeCompiler;
4151 break;
4152
4153 case N_PSYM:
4154 // parameter: name,,NO_SECT,type,offset
4155 type = eSymbolTypeVariable;
4156 break;
4157
4158 case N_ENTRY:
4159 // alternate entry: name,,n_sect,linenumber,address
4160 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4161 type = eSymbolTypeLineEntry;
4162 break;
4163
4164 // Left and Right Braces
4165 case N_LBRAC:
4166 // left bracket: 0,,NO_SECT,nesting level,address We use the
4167 // current number of symbols in the symbol table in lieu of using
4168 // nlist_idx in case we ever start trimming entries out
4169 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4170 N_BRAC_indexes.push_back(x: sym_idx);
4171 type = eSymbolTypeScopeBegin;
4172 break;
4173
4174 case N_RBRAC:
4175 // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4176 // the N_LBRAC to the terminating index of this N_RBRAC so that we
4177 // can always skip the entire symbol if we need to navigate more
4178 // quickly at the source level when parsing STABS
4179 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4180 if (!N_BRAC_indexes.empty()) {
4181 symbol_ptr = symtab.SymbolAtIndex(idx: N_BRAC_indexes.back());
4182 symbol_ptr->SetByteSize(sym_idx + 1);
4183 symbol_ptr->SetSizeIsSibling(true);
4184 N_BRAC_indexes.pop_back();
4185 }
4186 type = eSymbolTypeScopeEnd;
4187 break;
4188
4189 case N_EXCL:
4190 // deleted include file: name,,NO_SECT,0,sum
4191 type = eSymbolTypeHeaderFile;
4192 break;
4193
4194 // COMM scopes
4195 case N_BCOMM:
4196 // begin common: name,,NO_SECT,0,0
4197 // We use the current number of symbols in the symbol table in lieu
4198 // of using nlist_idx in case we ever start trimming entries out
4199 type = eSymbolTypeScopeBegin;
4200 N_COMM_indexes.push_back(x: sym_idx);
4201 break;
4202
4203 case N_ECOML:
4204 // end common (local name): 0,,n_sect,0,address
4205 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4206 [[fallthrough]];
4207
4208 case N_ECOMM:
4209 // end common: name,,n_sect,0,0
4210 // Set the size of the N_BCOMM to the terminating index of this
4211 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4212 // we need to navigate more quickly at the source level when
4213 // parsing STABS
4214 if (!N_COMM_indexes.empty()) {
4215 symbol_ptr = symtab.SymbolAtIndex(idx: N_COMM_indexes.back());
4216 symbol_ptr->SetByteSize(sym_idx + 1);
4217 symbol_ptr->SetSizeIsSibling(true);
4218 N_COMM_indexes.pop_back();
4219 }
4220 type = eSymbolTypeScopeEnd;
4221 break;
4222
4223 case N_LENG:
4224 // second stab entry with length information
4225 type = eSymbolTypeAdditional;
4226 break;
4227
4228 default:
4229 break;
4230 }
4231 } else {
4232 uint8_t n_type = N_TYPE & nlist.n_type;
4233 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4234
4235 switch (n_type) {
4236 case N_INDR: {
4237 const char *reexport_name_cstr = strtab_data.PeekCStr(offset: nlist.n_value);
4238 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4239 type = eSymbolTypeReExported;
4240 ConstString reexport_name(reexport_name_cstr +
4241 ((reexport_name_cstr[0] == '_') ? 1 : 0));
4242 sym[sym_idx].SetReExportedSymbolName(reexport_name);
4243 set_value = false;
4244 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4245 indirect_symbol_names.insert(
4246 x: ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4247 } else
4248 type = eSymbolTypeUndefined;
4249 } break;
4250
4251 case N_UNDF:
4252 if (symbol_name && symbol_name[0]) {
4253 ConstString undefined_name(symbol_name +
4254 ((symbol_name[0] == '_') ? 1 : 0));
4255 undefined_name_to_desc[undefined_name] = nlist.n_desc;
4256 }
4257 [[fallthrough]];
4258
4259 case N_PBUD:
4260 type = eSymbolTypeUndefined;
4261 break;
4262
4263 case N_ABS:
4264 type = eSymbolTypeAbsolute;
4265 break;
4266
4267 case N_SECT: {
4268 symbol_section = section_info.GetSection(n_sect: nlist.n_sect, file_addr: nlist.n_value);
4269
4270 if (!symbol_section) {
4271 // TODO: warn about this?
4272 add_nlist = false;
4273 break;
4274 }
4275
4276 if (TEXT_eh_frame_sectID == nlist.n_sect) {
4277 type = eSymbolTypeException;
4278 } else {
4279 uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4280
4281 switch (section_type) {
4282 case S_CSTRING_LITERALS:
4283 type = eSymbolTypeData;
4284 break; // section with only literal C strings
4285 case S_4BYTE_LITERALS:
4286 type = eSymbolTypeData;
4287 break; // section with only 4 byte literals
4288 case S_8BYTE_LITERALS:
4289 type = eSymbolTypeData;
4290 break; // section with only 8 byte literals
4291 case S_LITERAL_POINTERS:
4292 type = eSymbolTypeTrampoline;
4293 break; // section with only pointers to literals
4294 case S_NON_LAZY_SYMBOL_POINTERS:
4295 type = eSymbolTypeTrampoline;
4296 break; // section with only non-lazy symbol pointers
4297 case S_LAZY_SYMBOL_POINTERS:
4298 type = eSymbolTypeTrampoline;
4299 break; // section with only lazy symbol pointers
4300 case S_SYMBOL_STUBS:
4301 type = eSymbolTypeTrampoline;
4302 break; // section with only symbol stubs, byte size of stub in
4303 // the reserved2 field
4304 case S_MOD_INIT_FUNC_POINTERS:
4305 type = eSymbolTypeCode;
4306 break; // section with only function pointers for initialization
4307 case S_MOD_TERM_FUNC_POINTERS:
4308 type = eSymbolTypeCode;
4309 break; // section with only function pointers for termination
4310 case S_INTERPOSING:
4311 type = eSymbolTypeTrampoline;
4312 break; // section with only pairs of function pointers for
4313 // interposing
4314 case S_16BYTE_LITERALS:
4315 type = eSymbolTypeData;
4316 break; // section with only 16 byte literals
4317 case S_DTRACE_DOF:
4318 type = eSymbolTypeInstrumentation;
4319 break;
4320 case S_LAZY_DYLIB_SYMBOL_POINTERS:
4321 type = eSymbolTypeTrampoline;
4322 break;
4323 default:
4324 switch (symbol_section->GetType()) {
4325 case lldb::eSectionTypeCode:
4326 type = eSymbolTypeCode;
4327 break;
4328 case eSectionTypeData:
4329 case eSectionTypeDataCString: // Inlined C string data
4330 case eSectionTypeDataCStringPointers: // Pointers to C string
4331 // data
4332 case eSectionTypeDataSymbolAddress: // Address of a symbol in
4333 // the symbol table
4334 case eSectionTypeData4:
4335 case eSectionTypeData8:
4336 case eSectionTypeData16:
4337 type = eSymbolTypeData;
4338 break;
4339 default:
4340 break;
4341 }
4342 break;
4343 }
4344
4345 if (type == eSymbolTypeInvalid) {
4346 const char *symbol_sect_name =
4347 symbol_section->GetName().AsCString();
4348 if (symbol_section->IsDescendant(section: text_section_sp.get())) {
4349 if (symbol_section->IsClear(bit: S_ATTR_PURE_INSTRUCTIONS |
4350 S_ATTR_SELF_MODIFYING_CODE |
4351 S_ATTR_SOME_INSTRUCTIONS))
4352 type = eSymbolTypeData;
4353 else
4354 type = eSymbolTypeCode;
4355 } else if (symbol_section->IsDescendant(section: data_section_sp.get()) ||
4356 symbol_section->IsDescendant(
4357 section: data_dirty_section_sp.get()) ||
4358 symbol_section->IsDescendant(
4359 section: data_const_section_sp.get())) {
4360 if (symbol_sect_name &&
4361 ::strstr(haystack: symbol_sect_name, needle: "__objc") == symbol_sect_name) {
4362 type = eSymbolTypeRuntime;
4363
4364 if (symbol_name) {
4365 llvm::StringRef symbol_name_ref(symbol_name);
4366 if (symbol_name_ref.starts_with(Prefix: "_OBJC_")) {
4367 llvm::StringRef g_objc_v2_prefix_class(
4368 "_OBJC_CLASS_$_");
4369 llvm::StringRef g_objc_v2_prefix_metaclass(
4370 "_OBJC_METACLASS_$_");
4371 llvm::StringRef g_objc_v2_prefix_ivar(
4372 "_OBJC_IVAR_$_");
4373 if (symbol_name_ref.starts_with(Prefix: g_objc_v2_prefix_class)) {
4374 symbol_name_non_abi_mangled = symbol_name + 1;
4375 symbol_name =
4376 symbol_name + g_objc_v2_prefix_class.size();
4377 type = eSymbolTypeObjCClass;
4378 demangled_is_synthesized = true;
4379 } else if (symbol_name_ref.starts_with(
4380 Prefix: g_objc_v2_prefix_metaclass)) {
4381 symbol_name_non_abi_mangled = symbol_name + 1;
4382 symbol_name =
4383 symbol_name + g_objc_v2_prefix_metaclass.size();
4384 type = eSymbolTypeObjCMetaClass;
4385 demangled_is_synthesized = true;
4386 } else if (symbol_name_ref.starts_with(
4387 Prefix: g_objc_v2_prefix_ivar)) {
4388 symbol_name_non_abi_mangled = symbol_name + 1;
4389 symbol_name =
4390 symbol_name + g_objc_v2_prefix_ivar.size();
4391 type = eSymbolTypeObjCIVar;
4392 demangled_is_synthesized = true;
4393 }
4394 }
4395 }
4396 } else if (symbol_sect_name &&
4397 ::strstr(haystack: symbol_sect_name, needle: "__gcc_except_tab") ==
4398 symbol_sect_name) {
4399 type = eSymbolTypeException;
4400 } else {
4401 type = eSymbolTypeData;
4402 }
4403 } else if (symbol_sect_name &&
4404 ::strstr(haystack: symbol_sect_name, needle: "__IMPORT") ==
4405 symbol_sect_name) {
4406 type = eSymbolTypeTrampoline;
4407 } else if (symbol_section->IsDescendant(section: objc_section_sp.get())) {
4408 type = eSymbolTypeRuntime;
4409 if (symbol_name && symbol_name[0] == '.') {
4410 llvm::StringRef symbol_name_ref(symbol_name);
4411 llvm::StringRef g_objc_v1_prefix_class(
4412 ".objc_class_name_");
4413 if (symbol_name_ref.starts_with(Prefix: g_objc_v1_prefix_class)) {
4414 symbol_name_non_abi_mangled = symbol_name;
4415 symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4416 type = eSymbolTypeObjCClass;
4417 demangled_is_synthesized = true;
4418 }
4419 }
4420 }
4421 }
4422 }
4423 } break;
4424 }
4425 }
4426
4427 if (!add_nlist) {
4428 sym[sym_idx].Clear();
4429 return true;
4430 }
4431
4432 uint64_t symbol_value = nlist.n_value;
4433
4434 if (symbol_name_non_abi_mangled) {
4435 sym[sym_idx].GetMangled().SetMangledName(
4436 ConstString(symbol_name_non_abi_mangled));
4437 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4438 } else {
4439
4440 if (symbol_name && symbol_name[0] == '_') {
4441 symbol_name++; // Skip the leading underscore
4442 }
4443
4444 if (symbol_name) {
4445 ConstString const_symbol_name(symbol_name);
4446 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
4447 }
4448 }
4449
4450 if (is_gsym) {
4451 const char *gsym_name = sym[sym_idx]
4452 .GetMangled()
4453 .GetName(preference: Mangled::ePreferMangled)
4454 .GetCString();
4455 if (gsym_name)
4456 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4457 }
4458
4459 if (symbol_section) {
4460 const addr_t section_file_addr = symbol_section->GetFileAddress();
4461 if (symbol_byte_size == 0 && function_starts_count > 0) {
4462 addr_t symbol_lookup_file_addr = nlist.n_value;
4463 // Do an exact address match for non-ARM addresses, else get the
4464 // closest since the symbol might be a thumb symbol which has an
4465 // address with bit zero set.
4466 FunctionStarts::Entry *func_start_entry =
4467 function_starts.FindEntry(addr: symbol_lookup_file_addr, exact_match_only: !is_arm);
4468 if (is_arm && func_start_entry) {
4469 // Verify that the function start address is the symbol address
4470 // (ARM) or the symbol address + 1 (thumb).
4471 if (func_start_entry->addr != symbol_lookup_file_addr &&
4472 func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4473 // Not the right entry, NULL it out...
4474 func_start_entry = nullptr;
4475 }
4476 }
4477 if (func_start_entry) {
4478 func_start_entry->data = true;
4479
4480 addr_t symbol_file_addr = func_start_entry->addr;
4481 if (is_arm)
4482 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4483
4484 const FunctionStarts::Entry *next_func_start_entry =
4485 function_starts.FindNextEntry(entry: func_start_entry);
4486 const addr_t section_end_file_addr =
4487 section_file_addr + symbol_section->GetByteSize();
4488 if (next_func_start_entry) {
4489 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4490 // Be sure the clear the Thumb address bit when we calculate the
4491 // size from the current and next address
4492 if (is_arm)
4493 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4494 symbol_byte_size = std::min<lldb::addr_t>(
4495 a: next_symbol_file_addr - symbol_file_addr,
4496 b: section_end_file_addr - symbol_file_addr);
4497 } else {
4498 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4499 }
4500 }
4501 }
4502 symbol_value -= section_file_addr;
4503 }
4504
4505 if (!is_debug) {
4506 if (type == eSymbolTypeCode) {
4507 // See if we can find a N_FUN entry for any code symbols. If we do
4508 // find a match, and the name matches, then we can merge the two into
4509 // just the function symbol to avoid duplicate entries in the symbol
4510 // table.
4511 std::pair<ValueToSymbolIndexMap::const_iterator,
4512 ValueToSymbolIndexMap::const_iterator>
4513 range;
4514 range = N_FUN_addr_to_sym_idx.equal_range(x: nlist.n_value);
4515 if (range.first != range.second) {
4516 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4517 pos != range.second; ++pos) {
4518 if (sym[sym_idx].GetMangled().GetName(preference: Mangled::ePreferMangled) ==
4519 sym[pos->second].GetMangled().GetName(
4520 preference: Mangled::ePreferMangled)) {
4521 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4522 // We just need the flags from the linker symbol, so put these
4523 // flags into the N_FUN flags to avoid duplicate symbols in the
4524 // symbol table.
4525 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4526 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4527 if (resolver_addresses.find(x: nlist.n_value) !=
4528 resolver_addresses.end())
4529 sym[pos->second].SetType(eSymbolTypeResolver);
4530 sym[sym_idx].Clear();
4531 return true;
4532 }
4533 }
4534 } else {
4535 if (resolver_addresses.find(x: nlist.n_value) !=
4536 resolver_addresses.end())
4537 type = eSymbolTypeResolver;
4538 }
4539 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4540 type == eSymbolTypeObjCMetaClass ||
4541 type == eSymbolTypeObjCIVar) {
4542 // See if we can find a N_STSYM entry for any data symbols. If we do
4543 // find a match, and the name matches, then we can merge the two into
4544 // just the Static symbol to avoid duplicate entries in the symbol
4545 // table.
4546 std::pair<ValueToSymbolIndexMap::const_iterator,
4547 ValueToSymbolIndexMap::const_iterator>
4548 range;
4549 range = N_STSYM_addr_to_sym_idx.equal_range(x: nlist.n_value);
4550 if (range.first != range.second) {
4551 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4552 pos != range.second; ++pos) {
4553 if (sym[sym_idx].GetMangled().GetName(preference: Mangled::ePreferMangled) ==
4554 sym[pos->second].GetMangled().GetName(
4555 preference: Mangled::ePreferMangled)) {
4556 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4557 // We just need the flags from the linker symbol, so put these
4558 // flags into the N_STSYM flags to avoid duplicate symbols in
4559 // the symbol table.
4560 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4561 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4562 sym[sym_idx].Clear();
4563 return true;
4564 }
4565 }
4566 } else {
4567 // Combine N_GSYM stab entries with the non stab symbol.
4568 const char *gsym_name = sym[sym_idx]
4569 .GetMangled()
4570 .GetName(preference: Mangled::ePreferMangled)
4571 .GetCString();
4572 if (gsym_name) {
4573 ConstNameToSymbolIndexMap::const_iterator pos =
4574 N_GSYM_name_to_sym_idx.find(Val: gsym_name);
4575 if (pos != N_GSYM_name_to_sym_idx.end()) {
4576 const uint32_t GSYM_sym_idx = pos->second;
4577 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4578 // Copy the address, because often the N_GSYM address has an
4579 // invalid address of zero when the global is a common symbol.
4580 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4581 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4582 add_symbol_addr(
4583 sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4584 // We just need the flags from the linker symbol, so put these
4585 // flags into the N_GSYM flags to avoid duplicate symbols in
4586 // the symbol table.
4587 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4588 sym[sym_idx].Clear();
4589 return true;
4590 }
4591 }
4592 }
4593 }
4594 }
4595
4596 sym[sym_idx].SetID(nlist_idx);
4597 sym[sym_idx].SetType(type);
4598 if (set_value) {
4599 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4600 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4601 if (symbol_section)
4602 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4603 }
4604 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4605 if (nlist.n_desc & N_WEAK_REF)
4606 sym[sym_idx].SetIsWeak(true);
4607
4608 if (symbol_byte_size > 0)
4609 sym[sym_idx].SetByteSize(symbol_byte_size);
4610
4611 if (demangled_is_synthesized)
4612 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4613
4614 ++sym_idx;
4615 return true;
4616 };
4617
4618 // First parse all the nlists but don't process them yet. See the next
4619 // comment for an explanation why.
4620 std::vector<struct nlist_64> nlists;
4621 nlists.reserve(n: symtab_load_command.nsyms);
4622 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4623 if (auto nlist =
4624 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4625 nlists.push_back(x: *nlist);
4626 else
4627 break;
4628 }
4629
4630 // Now parse all the debug symbols. This is needed to merge non-debug
4631 // symbols in the next step. Non-debug symbols are always coalesced into
4632 // the debug symbol. Doing this in one step would mean that some symbols
4633 // won't be merged.
4634 nlist_idx = 0;
4635 for (auto &nlist : nlists) {
4636 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4637 break;
4638 }
4639
4640 // Finally parse all the non debug symbols.
4641 nlist_idx = 0;
4642 for (auto &nlist : nlists) {
4643 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4644 break;
4645 }
4646
4647 for (const auto &pos : reexport_shlib_needs_fixup) {
4648 const auto undef_pos = undefined_name_to_desc.find(Val: pos.second);
4649 if (undef_pos != undefined_name_to_desc.end()) {
4650 const uint8_t dylib_ordinal =
4651 llvm::MachO::GET_LIBRARY_ORDINAL(n_desc: undef_pos->second);
4652 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4653 sym[pos.first].SetReExportedSymbolSharedLibrary(
4654 dylib_files.GetFileSpecAtIndex(idx: dylib_ordinal - 1));
4655 }
4656 }
4657 }
4658
4659 // Count how many trie symbols we'll add to the symbol table
4660 int trie_symbol_table_augment_count = 0;
4661 for (auto &e : external_sym_trie_entries) {
4662 if (!symbols_added.contains(V: e.entry.address))
4663 trie_symbol_table_augment_count++;
4664 }
4665
4666 if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4667 num_syms = sym_idx + trie_symbol_table_augment_count;
4668 sym = symtab.Resize(count: num_syms);
4669 }
4670 uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4671
4672 // Add symbols from the trie to the symbol table.
4673 for (auto &e : external_sym_trie_entries) {
4674 if (symbols_added.contains(V: e.entry.address))
4675 continue;
4676
4677 // Find the section that this trie address is in, use that to annotate
4678 // symbol type as we add the trie address and name to the symbol table.
4679 Address symbol_addr;
4680 if (module_sp->ResolveFileAddress(vm_addr: e.entry.address, so_addr&: symbol_addr)) {
4681 SectionSP symbol_section(symbol_addr.GetSection());
4682 const char *symbol_name = e.entry.name.GetCString();
4683 bool demangled_is_synthesized = false;
4684 SymbolType type =
4685 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4686 data_section_sp, data_dirty_section_sp,
4687 data_const_section_sp, symbol_section);
4688
4689 sym[sym_idx].SetType(type);
4690 if (symbol_section) {
4691 sym[sym_idx].SetID(synthetic_sym_id++);
4692 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4693 if (demangled_is_synthesized)
4694 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4695 sym[sym_idx].SetIsSynthetic(true);
4696 sym[sym_idx].SetExternal(true);
4697 sym[sym_idx].GetAddressRef() = symbol_addr;
4698 add_symbol_addr(symbol_addr.GetFileAddress());
4699 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4700 sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
4701 ++sym_idx;
4702 }
4703 }
4704 }
4705
4706 if (function_starts_count > 0) {
4707 uint32_t num_synthetic_function_symbols = 0;
4708 for (i = 0; i < function_starts_count; ++i) {
4709 if (!symbols_added.contains(V: function_starts.GetEntryRef(i).addr))
4710 ++num_synthetic_function_symbols;
4711 }
4712
4713 if (num_synthetic_function_symbols > 0) {
4714 if (num_syms < sym_idx + num_synthetic_function_symbols) {
4715 num_syms = sym_idx + num_synthetic_function_symbols;
4716 sym = symtab.Resize(count: num_syms);
4717 }
4718 for (i = 0; i < function_starts_count; ++i) {
4719 const FunctionStarts::Entry *func_start_entry =
4720 function_starts.GetEntryAtIndex(i);
4721 if (!symbols_added.contains(V: func_start_entry->addr)) {
4722 addr_t symbol_file_addr = func_start_entry->addr;
4723 uint32_t symbol_flags = 0;
4724 if (func_start_entry->data)
4725 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4726 Address symbol_addr;
4727 if (module_sp->ResolveFileAddress(vm_addr: symbol_file_addr, so_addr&: symbol_addr)) {
4728 SectionSP symbol_section(symbol_addr.GetSection());
4729 uint32_t symbol_byte_size = 0;
4730 if (symbol_section) {
4731 const addr_t section_file_addr = symbol_section->GetFileAddress();
4732 const FunctionStarts::Entry *next_func_start_entry =
4733 function_starts.FindNextEntry(entry: func_start_entry);
4734 const addr_t section_end_file_addr =
4735 section_file_addr + symbol_section->GetByteSize();
4736 if (next_func_start_entry) {
4737 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4738 if (is_arm)
4739 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4740 symbol_byte_size = std::min<lldb::addr_t>(
4741 a: next_symbol_file_addr - symbol_file_addr,
4742 b: section_end_file_addr - symbol_file_addr);
4743 } else {
4744 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4745 }
4746 sym[sym_idx].SetID(synthetic_sym_id++);
4747 // Don't set the name for any synthetic symbols, the Symbol
4748 // object will generate one if needed when the name is accessed
4749 // via accessors.
4750 sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4751 sym[sym_idx].SetType(eSymbolTypeCode);
4752 sym[sym_idx].SetIsSynthetic(true);
4753 sym[sym_idx].GetAddressRef() = symbol_addr;
4754 add_symbol_addr(symbol_addr.GetFileAddress());
4755 if (symbol_flags)
4756 sym[sym_idx].SetFlags(symbol_flags);
4757 if (symbol_byte_size)
4758 sym[sym_idx].SetByteSize(symbol_byte_size);
4759 ++sym_idx;
4760 }
4761 }
4762 }
4763 }
4764 }
4765 }
4766
4767 // Trim our symbols down to just what we ended up with after removing any
4768 // symbols.
4769 if (sym_idx < num_syms) {
4770 num_syms = sym_idx;
4771 sym = symtab.Resize(count: num_syms);
4772 }
4773
4774 // Now synthesize indirect symbols
4775 if (m_dysymtab.nindirectsyms != 0) {
4776 if (indirect_symbol_index_data.GetByteSize()) {
4777 NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4778 m_nlist_idx_to_sym_idx.end();
4779
4780 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4781 ++sect_idx) {
4782 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4783 S_SYMBOL_STUBS) {
4784 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4785 if (symbol_stub_byte_size == 0)
4786 continue;
4787
4788 const uint32_t num_symbol_stubs =
4789 m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4790
4791 if (num_symbol_stubs == 0)
4792 continue;
4793
4794 const uint32_t symbol_stub_index_offset =
4795 m_mach_sections[sect_idx].reserved1;
4796 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4797 const uint32_t symbol_stub_index =
4798 symbol_stub_index_offset + stub_idx;
4799 const lldb::addr_t symbol_stub_addr =
4800 m_mach_sections[sect_idx].addr +
4801 (stub_idx * symbol_stub_byte_size);
4802 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4803 if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4804 offset: symbol_stub_offset, length: 4)) {
4805 const uint32_t stub_sym_id =
4806 indirect_symbol_index_data.GetU32(offset_ptr: &symbol_stub_offset);
4807 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4808 continue;
4809
4810 NListIndexToSymbolIndexMap::const_iterator index_pos =
4811 m_nlist_idx_to_sym_idx.find(Val: stub_sym_id);
4812 Symbol *stub_symbol = nullptr;
4813 if (index_pos != end_index_pos) {
4814 // We have a remapping from the original nlist index to a
4815 // current symbol index, so just look this up by index
4816 stub_symbol = symtab.SymbolAtIndex(idx: index_pos->second);
4817 } else {
4818 // We need to lookup a symbol using the original nlist symbol
4819 // index since this index is coming from the S_SYMBOL_STUBS
4820 stub_symbol = symtab.FindSymbolByID(uid: stub_sym_id);
4821 }
4822
4823 if (stub_symbol) {
4824 Address so_addr(symbol_stub_addr, section_list);
4825
4826 if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4827 // Change the external symbol into a trampoline that makes
4828 // sense These symbols were N_UNDF N_EXT, and are useless
4829 // to us, so we can re-use them so we don't have to make up
4830 // a synthetic symbol for no good reason.
4831 if (resolver_addresses.find(x: symbol_stub_addr) ==
4832 resolver_addresses.end())
4833 stub_symbol->SetType(eSymbolTypeTrampoline);
4834 else
4835 stub_symbol->SetType(eSymbolTypeResolver);
4836 stub_symbol->SetExternal(false);
4837 stub_symbol->GetAddressRef() = so_addr;
4838 stub_symbol->SetByteSize(symbol_stub_byte_size);
4839 } else {
4840 // Make a synthetic symbol to describe the trampoline stub
4841 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4842 if (sym_idx >= num_syms) {
4843 sym = symtab.Resize(count: ++num_syms);
4844 stub_symbol = nullptr; // this pointer no longer valid
4845 }
4846 sym[sym_idx].SetID(synthetic_sym_id++);
4847 sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4848 if (resolver_addresses.find(x: symbol_stub_addr) ==
4849 resolver_addresses.end())
4850 sym[sym_idx].SetType(eSymbolTypeTrampoline);
4851 else
4852 sym[sym_idx].SetType(eSymbolTypeResolver);
4853 sym[sym_idx].SetIsSynthetic(true);
4854 sym[sym_idx].GetAddressRef() = so_addr;
4855 add_symbol_addr(so_addr.GetFileAddress());
4856 sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4857 ++sym_idx;
4858 }
4859 } else {
4860 if (log)
4861 log->Warning(fmt: "symbol stub referencing symbol table symbol "
4862 "%u that isn't in our minimal symbol table, "
4863 "fix this!!!",
4864 stub_sym_id);
4865 }
4866 }
4867 }
4868 }
4869 }
4870 }
4871 }
4872
4873 if (!reexport_trie_entries.empty()) {
4874 for (const auto &e : reexport_trie_entries) {
4875 if (e.entry.import_name) {
4876 // Only add indirect symbols from the Trie entries if we didn't have
4877 // a N_INDR nlist entry for this already
4878 if (indirect_symbol_names.find(x: e.entry.name) ==
4879 indirect_symbol_names.end()) {
4880 // Make a synthetic symbol to describe re-exported symbol.
4881 if (sym_idx >= num_syms)
4882 sym = symtab.Resize(count: ++num_syms);
4883 sym[sym_idx].SetID(synthetic_sym_id++);
4884 sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4885 sym[sym_idx].SetType(eSymbolTypeReExported);
4886 sym[sym_idx].SetIsSynthetic(true);
4887 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4888 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4889 sym[sym_idx].SetReExportedSymbolSharedLibrary(
4890 dylib_files.GetFileSpecAtIndex(idx: e.entry.other - 1));
4891 }
4892 ++sym_idx;
4893 }
4894 }
4895 }
4896 }
4897}
4898
4899void ObjectFileMachO::Dump(Stream *s) {
4900 ModuleSP module_sp(GetModule());
4901 if (module_sp) {
4902 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4903 s->Printf(format: "%p: ", static_cast<void *>(this));
4904 s->Indent();
4905 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4906 s->PutCString(cstr: "ObjectFileMachO64");
4907 else
4908 s->PutCString(cstr: "ObjectFileMachO32");
4909
4910 *s << ", file = '" << m_file;
4911 ModuleSpecList all_specs;
4912 ModuleSpec base_spec;
4913 GetAllArchSpecs(header: m_header, data: m_data, lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic),
4914 base_spec, all_specs);
4915 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4916 *s << "', triple";
4917 if (e)
4918 s->Printf(format: "[%d]", i);
4919 *s << " = ";
4920 *s << all_specs.GetModuleSpecRefAtIndex(i)
4921 .GetArchitecture()
4922 .GetTriple()
4923 .getTriple();
4924 }
4925 *s << "\n";
4926 SectionList *sections = GetSectionList();
4927 if (sections)
4928 sections->Dump(s&: s->AsRawOstream(), indent: s->GetIndentLevel(), target: nullptr, show_header: true,
4929 UINT32_MAX);
4930
4931 if (m_symtab_up)
4932 m_symtab_up->Dump(s, target: nullptr, sort_type: eSortOrderNone);
4933 }
4934}
4935
4936UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4937 const lldb_private::DataExtractor &data,
4938 lldb::offset_t lc_offset) {
4939 uint32_t i;
4940 llvm::MachO::uuid_command load_cmd;
4941
4942 lldb::offset_t offset = lc_offset;
4943 for (i = 0; i < header.ncmds; ++i) {
4944 const lldb::offset_t cmd_offset = offset;
4945 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
4946 break;
4947
4948 if (load_cmd.cmd == LC_UUID) {
4949 const uint8_t *uuid_bytes = data.PeekData(offset, length: 16);
4950
4951 if (uuid_bytes) {
4952 // OpenCL on Mac OS X uses the same UUID for each of its object files.
4953 // We pretend these object files have no UUID to prevent crashing.
4954
4955 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4956 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4957 0xbb, 0x14, 0xf0, 0x0d};
4958
4959 if (!memcmp(s1: uuid_bytes, s2: opencl_uuid, n: 16))
4960 return UUID();
4961
4962 return UUID(uuid_bytes, 16);
4963 }
4964 return UUID();
4965 }
4966 offset = cmd_offset + load_cmd.cmdsize;
4967 }
4968 return UUID();
4969}
4970
4971static llvm::StringRef GetOSName(uint32_t cmd) {
4972 switch (cmd) {
4973 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4974 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4975 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4976 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::MacOSX);
4977 case llvm::MachO::LC_VERSION_MIN_TVOS:
4978 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
4979 case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4980 return llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
4981 default:
4982 llvm_unreachable("unexpected LC_VERSION load command");
4983 }
4984}
4985
4986namespace {
4987struct OSEnv {
4988 llvm::StringRef os_type;
4989 llvm::StringRef environment;
4990 OSEnv(uint32_t cmd) {
4991 switch (cmd) {
4992 case llvm::MachO::PLATFORM_MACOS:
4993 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::MacOSX);
4994 return;
4995 case llvm::MachO::PLATFORM_IOS:
4996 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
4997 return;
4998 case llvm::MachO::PLATFORM_TVOS:
4999 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
5000 return;
5001 case llvm::MachO::PLATFORM_WATCHOS:
5002 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
5003 return;
5004 case llvm::MachO::PLATFORM_BRIDGEOS:
5005 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::BridgeOS);
5006 return;
5007 case llvm::MachO::PLATFORM_DRIVERKIT:
5008 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::DriverKit);
5009 return;
5010 case llvm::MachO::PLATFORM_MACCATALYST:
5011 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
5012 environment = llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::MacABI);
5013 return;
5014 case llvm::MachO::PLATFORM_IOSSIMULATOR:
5015 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::IOS);
5016 environment =
5017 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
5018 return;
5019 case llvm::MachO::PLATFORM_TVOSSIMULATOR:
5020 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::TvOS);
5021 environment =
5022 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
5023 return;
5024 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
5025 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::WatchOS);
5026 environment =
5027 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
5028 return;
5029 case llvm::MachO::PLATFORM_XROS:
5030 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::XROS);
5031 return;
5032 case llvm::MachO::PLATFORM_XROS_SIMULATOR:
5033 os_type = llvm::Triple::getOSTypeName(Kind: llvm::Triple::XROS);
5034 environment =
5035 llvm::Triple::getEnvironmentTypeName(Kind: llvm::Triple::Simulator);
5036 return;
5037 default: {
5038 Log *log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Process));
5039 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
5040 }
5041 }
5042 }
5043};
5044
5045struct MinOS {
5046 uint32_t major_version, minor_version, patch_version;
5047 MinOS(uint32_t version)
5048 : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
5049 patch_version(version & 0xffu) {}
5050};
5051} // namespace
5052
5053void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
5054 const lldb_private::DataExtractor &data,
5055 lldb::offset_t lc_offset,
5056 ModuleSpec &base_spec,
5057 lldb_private::ModuleSpecList &all_specs) {
5058 auto &base_arch = base_spec.GetArchitecture();
5059 base_arch.SetArchitecture(arch_type: eArchTypeMachO, cpu: header.cputype, sub: header.cpusubtype);
5060 if (!base_arch.IsValid())
5061 return;
5062
5063 bool found_any = false;
5064 auto add_triple = [&](const llvm::Triple &triple) {
5065 auto spec = base_spec;
5066 spec.GetArchitecture().GetTriple() = triple;
5067 if (spec.GetArchitecture().IsValid()) {
5068 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
5069 all_specs.Append(spec);
5070 found_any = true;
5071 }
5072 };
5073
5074 // Set OS to an unspecified unknown or a "*" so it can match any OS
5075 llvm::Triple base_triple = base_arch.GetTriple();
5076 base_triple.setOS(llvm::Triple::UnknownOS);
5077 base_triple.setOSName(llvm::StringRef());
5078
5079 if (header.filetype == MH_PRELOAD) {
5080 if (header.cputype == CPU_TYPE_ARM) {
5081 // If this is a 32-bit arm binary, and it's a standalone binary, force
5082 // the Vendor to Apple so we don't accidentally pick up the generic
5083 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
5084 // frame pointer register; most other armv7 ABIs use a combination of
5085 // r7 and r11.
5086 base_triple.setVendor(llvm::Triple::Apple);
5087 } else {
5088 // Set vendor to an unspecified unknown or a "*" so it can match any
5089 // vendor This is required for correct behavior of EFI debugging on
5090 // x86_64
5091 base_triple.setVendor(llvm::Triple::UnknownVendor);
5092 base_triple.setVendorName(llvm::StringRef());
5093 }
5094 return add_triple(base_triple);
5095 }
5096
5097 llvm::MachO::load_command load_cmd;
5098
5099 // See if there is an LC_VERSION_MIN_* load command that can give
5100 // us the OS type.
5101 lldb::offset_t offset = lc_offset;
5102 for (uint32_t i = 0; i < header.ncmds; ++i) {
5103 const lldb::offset_t cmd_offset = offset;
5104 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5105 break;
5106
5107 llvm::MachO::version_min_command version_min;
5108 switch (load_cmd.cmd) {
5109 case llvm::MachO::LC_VERSION_MIN_MACOSX:
5110 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
5111 case llvm::MachO::LC_VERSION_MIN_TVOS:
5112 case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5113 if (load_cmd.cmdsize != sizeof(version_min))
5114 break;
5115 if (data.ExtractBytes(offset: cmd_offset, length: sizeof(version_min),
5116 dst_byte_order: data.GetByteOrder(), dst: &version_min) == 0)
5117 break;
5118 MinOS min_os(version_min.version);
5119 llvm::SmallString<32> os_name;
5120 llvm::raw_svector_ostream os(os_name);
5121 os << GetOSName(cmd: load_cmd.cmd) << min_os.major_version << '.'
5122 << min_os.minor_version << '.' << min_os.patch_version;
5123
5124 auto triple = base_triple;
5125 triple.setOSName(os.str());
5126
5127 // Disambiguate legacy simulator platforms.
5128 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5129 (base_triple.getArch() == llvm::Triple::x86_64 ||
5130 base_triple.getArch() == llvm::Triple::x86)) {
5131 // The combination of legacy LC_VERSION_MIN load command and
5132 // x86 architecture always indicates a simulator environment.
5133 // The combination of LC_VERSION_MIN and arm architecture only
5134 // appears for native binaries. Back-deploying simulator
5135 // binaries on Apple Silicon Macs use the modern unambigous
5136 // LC_BUILD_VERSION load commands; no special handling required.
5137 triple.setEnvironment(llvm::Triple::Simulator);
5138 }
5139 add_triple(triple);
5140 break;
5141 }
5142 default:
5143 break;
5144 }
5145
5146 offset = cmd_offset + load_cmd.cmdsize;
5147 }
5148
5149 // See if there are LC_BUILD_VERSION load commands that can give
5150 // us the OS type.
5151 offset = lc_offset;
5152 for (uint32_t i = 0; i < header.ncmds; ++i) {
5153 const lldb::offset_t cmd_offset = offset;
5154 if (data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5155 break;
5156
5157 do {
5158 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5159 llvm::MachO::build_version_command build_version;
5160 if (load_cmd.cmdsize < sizeof(build_version)) {
5161 // Malformed load command.
5162 break;
5163 }
5164 if (data.ExtractBytes(offset: cmd_offset, length: sizeof(build_version),
5165 dst_byte_order: data.GetByteOrder(), dst: &build_version) == 0)
5166 break;
5167 MinOS min_os(build_version.minos);
5168 OSEnv os_env(build_version.platform);
5169 llvm::SmallString<16> os_name;
5170 llvm::raw_svector_ostream os(os_name);
5171 os << os_env.os_type << min_os.major_version << '.'
5172 << min_os.minor_version << '.' << min_os.patch_version;
5173 auto triple = base_triple;
5174 triple.setOSName(os.str());
5175 os_name.clear();
5176 if (!os_env.environment.empty())
5177 triple.setEnvironmentName(os_env.environment);
5178 add_triple(triple);
5179 }
5180 } while (false);
5181 offset = cmd_offset + load_cmd.cmdsize;
5182 }
5183
5184 if (!found_any) {
5185 add_triple(base_triple);
5186 }
5187}
5188
5189ArchSpec ObjectFileMachO::GetArchitecture(
5190 ModuleSP module_sp, const llvm::MachO::mach_header &header,
5191 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5192 ModuleSpecList all_specs;
5193 ModuleSpec base_spec;
5194 GetAllArchSpecs(header, data, lc_offset: MachHeaderSizeFromMagic(magic: header.magic),
5195 base_spec, all_specs);
5196
5197 // If the object file offers multiple alternative load commands,
5198 // pick the one that matches the module.
5199 if (module_sp) {
5200 const ArchSpec &module_arch = module_sp->GetArchitecture();
5201 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5202 ArchSpec mach_arch =
5203 all_specs.GetModuleSpecRefAtIndex(i).GetArchitecture();
5204 if (module_arch.IsCompatibleMatch(rhs: mach_arch))
5205 return mach_arch;
5206 }
5207 }
5208
5209 // Return the first arch we found.
5210 if (all_specs.GetSize() == 0)
5211 return {};
5212 return all_specs.GetModuleSpecRefAtIndex(i: 0).GetArchitecture();
5213}
5214
5215UUID ObjectFileMachO::GetUUID() {
5216 ModuleSP module_sp(GetModule());
5217 if (module_sp) {
5218 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5219 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5220 return GetUUID(header: m_header, data: m_data, lc_offset: offset);
5221 }
5222 return UUID();
5223}
5224
5225uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) {
5226 ModuleSP module_sp = GetModule();
5227 if (!module_sp)
5228 return 0;
5229
5230 uint32_t count = 0;
5231 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5232 llvm::MachO::load_command load_cmd;
5233 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5234 std::vector<std::string> rpath_paths;
5235 std::vector<std::string> rpath_relative_paths;
5236 std::vector<std::string> at_exec_relative_paths;
5237 uint32_t i;
5238 for (i = 0; i < m_header.ncmds; ++i) {
5239 const uint32_t cmd_offset = offset;
5240 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5241 break;
5242
5243 switch (load_cmd.cmd) {
5244 case LC_RPATH:
5245 case LC_LOAD_DYLIB:
5246 case LC_LOAD_WEAK_DYLIB:
5247 case LC_REEXPORT_DYLIB:
5248 case LC_LOAD_DYLINKER:
5249 case LC_LOADFVMLIB:
5250 case LC_LOAD_UPWARD_DYLIB: {
5251 uint32_t name_offset = cmd_offset + m_data.GetU32(offset_ptr: &offset);
5252 // For LC_LOAD_DYLIB there is an alternate encoding
5253 // which adds a uint32_t `flags` field for `DYLD_USE_*`
5254 // flags. This can be detected by a timestamp field with
5255 // the `DYLIB_USE_MARKER` constant value.
5256 bool is_delayed_init = false;
5257 uint32_t use_command_marker = m_data.GetU32(offset_ptr: &offset);
5258 if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) {
5259 offset += 4; /* uint32_t current_version */
5260 offset += 4; /* uint32_t compat_version */
5261 uint32_t flags = m_data.GetU32(offset_ptr: &offset);
5262 // If this LC_LOAD_DYLIB is marked delay-init,
5263 // don't report it as a dependent library -- it
5264 // may be loaded in the process at some point,
5265 // but will most likely not be load at launch.
5266 if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */)
5267 is_delayed_init = true;
5268 }
5269 const char *path = m_data.PeekCStr(offset: name_offset);
5270 if (path && !is_delayed_init) {
5271 if (load_cmd.cmd == LC_RPATH)
5272 rpath_paths.push_back(x: path);
5273 else {
5274 if (path[0] == '@') {
5275 if (strncmp(s1: path, s2: "@rpath", n: strlen(s: "@rpath")) == 0)
5276 rpath_relative_paths.push_back(x: path + strlen(s: "@rpath"));
5277 else if (strncmp(s1: path, s2: "@executable_path",
5278 n: strlen(s: "@executable_path")) == 0)
5279 at_exec_relative_paths.push_back(x: path +
5280 strlen(s: "@executable_path"));
5281 } else {
5282 FileSpec file_spec(path);
5283 if (files.AppendIfUnique(file: file_spec))
5284 count++;
5285 }
5286 }
5287 }
5288 } break;
5289
5290 default:
5291 break;
5292 }
5293 offset = cmd_offset + load_cmd.cmdsize;
5294 }
5295
5296 FileSpec this_file_spec(m_file);
5297 FileSystem::Instance().Resolve(file_spec&: this_file_spec);
5298
5299 if (!rpath_paths.empty()) {
5300 // Fixup all LC_RPATH values to be absolute paths.
5301 const std::string this_directory =
5302 this_file_spec.GetDirectory().GetString();
5303 for (auto &rpath : rpath_paths) {
5304 if (llvm::StringRef(rpath).starts_with(Prefix: g_loader_path))
5305 rpath = this_directory + rpath.substr(pos: g_loader_path.size());
5306 else if (llvm::StringRef(rpath).starts_with(Prefix: g_executable_path))
5307 rpath = this_directory + rpath.substr(pos: g_executable_path.size());
5308 }
5309
5310 for (const auto &rpath_relative_path : rpath_relative_paths) {
5311 for (const auto &rpath : rpath_paths) {
5312 std::string path = rpath;
5313 path += rpath_relative_path;
5314 // It is OK to resolve this path because we must find a file on disk
5315 // for us to accept it anyway if it is rpath relative.
5316 FileSpec file_spec(path);
5317 FileSystem::Instance().Resolve(file_spec);
5318 if (FileSystem::Instance().Exists(file_spec) &&
5319 files.AppendIfUnique(file: file_spec)) {
5320 count++;
5321 break;
5322 }
5323 }
5324 }
5325 }
5326
5327 // We may have @executable_paths but no RPATHS. Figure those out here.
5328 // Only do this if this object file is the executable. We have no way to
5329 // get back to the actual executable otherwise, so we won't get the right
5330 // path.
5331 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5332 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5333 for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5334 FileSpec file_spec =
5335 exec_dir.CopyByAppendingPathComponent(component: at_exec_relative_path);
5336 if (FileSystem::Instance().Exists(file_spec) &&
5337 files.AppendIfUnique(file: file_spec))
5338 count++;
5339 }
5340 }
5341 return count;
5342}
5343
5344lldb_private::Address ObjectFileMachO::GetEntryPointAddress() {
5345 // If the object file is not an executable it can't hold the entry point.
5346 // m_entry_point_address is initialized to an invalid address, so we can just
5347 // return that. If m_entry_point_address is valid it means we've found it
5348 // already, so return the cached value.
5349
5350 if ((!IsExecutable() && !IsDynamicLoader()) ||
5351 m_entry_point_address.IsValid()) {
5352 return m_entry_point_address;
5353 }
5354
5355 // Otherwise, look for the UnixThread or Thread command. The data for the
5356 // Thread command is given in /usr/include/mach-o.h, but it is basically:
5357 //
5358 // uint32_t flavor - this is the flavor argument you would pass to
5359 // thread_get_state
5360 // uint32_t count - this is the count of longs in the thread state data
5361 // struct XXX_thread_state state - this is the structure from
5362 // <machine/thread_status.h> corresponding to the flavor.
5363 // <repeat this trio>
5364 //
5365 // So we just keep reading the various register flavors till we find the GPR
5366 // one, then read the PC out of there.
5367 // FIXME: We will need to have a "RegisterContext data provider" class at some
5368 // point that can get all the registers
5369 // out of data in this form & attach them to a given thread. That should
5370 // underlie the MacOS X User process plugin, and we'll also need it for the
5371 // MacOS X Core File process plugin. When we have that we can also use it
5372 // here.
5373 //
5374 // For now we hard-code the offsets and flavors we need:
5375 //
5376 //
5377
5378 ModuleSP module_sp(GetModule());
5379 if (module_sp) {
5380 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5381 llvm::MachO::load_command load_cmd;
5382 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5383 uint32_t i;
5384 lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5385 bool done = false;
5386
5387 for (i = 0; i < m_header.ncmds; ++i) {
5388 const lldb::offset_t cmd_offset = offset;
5389 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
5390 break;
5391
5392 switch (load_cmd.cmd) {
5393 case LC_UNIXTHREAD:
5394 case LC_THREAD: {
5395 while (offset < cmd_offset + load_cmd.cmdsize) {
5396 uint32_t flavor = m_data.GetU32(offset_ptr: &offset);
5397 uint32_t count = m_data.GetU32(offset_ptr: &offset);
5398 if (count == 0) {
5399 // We've gotten off somehow, log and exit;
5400 return m_entry_point_address;
5401 }
5402
5403 switch (m_header.cputype) {
5404 case llvm::MachO::CPU_TYPE_ARM:
5405 if (flavor == 1 ||
5406 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5407 // from mach/arm/thread_status.h
5408 {
5409 offset += 60; // This is the offset of pc in the GPR thread state
5410 // data structure.
5411 start_address = m_data.GetU32(offset_ptr: &offset);
5412 done = true;
5413 }
5414 break;
5415 case llvm::MachO::CPU_TYPE_ARM64:
5416 case llvm::MachO::CPU_TYPE_ARM64_32:
5417 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5418 {
5419 offset += 256; // This is the offset of pc in the GPR thread state
5420 // data structure.
5421 start_address = m_data.GetU64(offset_ptr: &offset);
5422 done = true;
5423 }
5424 break;
5425 case llvm::MachO::CPU_TYPE_I386:
5426 if (flavor ==
5427 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5428 {
5429 offset += 40; // This is the offset of eip in the GPR thread state
5430 // data structure.
5431 start_address = m_data.GetU32(offset_ptr: &offset);
5432 done = true;
5433 }
5434 break;
5435 case llvm::MachO::CPU_TYPE_X86_64:
5436 if (flavor ==
5437 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5438 {
5439 offset += 16 * 8; // This is the offset of rip in the GPR thread
5440 // state data structure.
5441 start_address = m_data.GetU64(offset_ptr: &offset);
5442 done = true;
5443 }
5444 break;
5445 default:
5446 return m_entry_point_address;
5447 }
5448 // Haven't found the GPR flavor yet, skip over the data for this
5449 // flavor:
5450 if (done)
5451 break;
5452 offset += count * 4;
5453 }
5454 } break;
5455 case LC_MAIN: {
5456 uint64_t entryoffset = m_data.GetU64(offset_ptr: &offset);
5457 SectionSP text_segment_sp =
5458 GetSectionList()->FindSectionByName(section_dstr: GetSegmentNameTEXT());
5459 if (text_segment_sp) {
5460 done = true;
5461 start_address = text_segment_sp->GetFileAddress() + entryoffset;
5462 }
5463 } break;
5464
5465 default:
5466 break;
5467 }
5468 if (done)
5469 break;
5470
5471 // Go to the next load command:
5472 offset = cmd_offset + load_cmd.cmdsize;
5473 }
5474
5475 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5476 if (GetSymtab()) {
5477 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5478 name: ConstString("_dyld_start"), symbol_type: SymbolType::eSymbolTypeCode,
5479 symbol_debug_type: Symtab::eDebugAny, symbol_visibility: Symtab::eVisibilityAny);
5480 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5481 start_address = dyld_start_sym->GetAddress().GetFileAddress();
5482 }
5483 }
5484 }
5485
5486 if (start_address != LLDB_INVALID_ADDRESS) {
5487 // We got the start address from the load commands, so now resolve that
5488 // address in the sections of this ObjectFile:
5489 if (!m_entry_point_address.ResolveAddressUsingFileSections(
5490 addr: start_address, sections: GetSectionList())) {
5491 m_entry_point_address.Clear();
5492 }
5493 } else {
5494 // We couldn't read the UnixThread load command - maybe it wasn't there.
5495 // As a fallback look for the "start" symbol in the main executable.
5496
5497 ModuleSP module_sp(GetModule());
5498
5499 if (module_sp) {
5500 SymbolContextList contexts;
5501 SymbolContext context;
5502 module_sp->FindSymbolsWithNameAndType(name: ConstString("start"),
5503 symbol_type: eSymbolTypeCode, sc_list&: contexts);
5504 if (contexts.GetSize()) {
5505 if (contexts.GetContextAtIndex(idx: 0, sc&: context))
5506 m_entry_point_address = context.symbol->GetAddress();
5507 }
5508 }
5509 }
5510 }
5511
5512 return m_entry_point_address;
5513}
5514
5515lldb_private::Address ObjectFileMachO::GetBaseAddress() {
5516 lldb_private::Address header_addr;
5517 SectionList *section_list = GetSectionList();
5518 if (section_list) {
5519 SectionSP text_segment_sp(
5520 section_list->FindSectionByName(section_dstr: GetSegmentNameTEXT()));
5521 if (text_segment_sp) {
5522 header_addr.SetSection(text_segment_sp);
5523 header_addr.SetOffset(0);
5524 }
5525 }
5526 return header_addr;
5527}
5528
5529uint32_t ObjectFileMachO::GetNumThreadContexts() {
5530 ModuleSP module_sp(GetModule());
5531 if (module_sp) {
5532 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5533 if (!m_thread_context_offsets_valid) {
5534 m_thread_context_offsets_valid = true;
5535 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5536 FileRangeArray::Entry file_range;
5537 llvm::MachO::thread_command thread_cmd;
5538 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5539 const uint32_t cmd_offset = offset;
5540 if (m_data.GetU32(offset_ptr: &offset, dst: &thread_cmd, count: 2) == nullptr)
5541 break;
5542
5543 if (thread_cmd.cmd == LC_THREAD) {
5544 file_range.SetRangeBase(offset);
5545 file_range.SetByteSize(thread_cmd.cmdsize - 8);
5546 m_thread_context_offsets.Append(entry: file_range);
5547 }
5548 offset = cmd_offset + thread_cmd.cmdsize;
5549 }
5550 }
5551 }
5552 return m_thread_context_offsets.GetSize();
5553}
5554
5555std::vector<std::tuple<offset_t, offset_t>>
5556ObjectFileMachO::FindLC_NOTEByName(std::string name) {
5557 std::vector<std::tuple<offset_t, offset_t>> results;
5558 ModuleSP module_sp(GetModule());
5559 if (module_sp) {
5560 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5561
5562 offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5563 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5564 const uint32_t cmd_offset = offset;
5565 llvm::MachO::load_command lc = {};
5566 if (m_data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
5567 break;
5568 if (lc.cmd == LC_NOTE) {
5569 char data_owner[17];
5570 m_data.CopyData(offset, length: 16, dst: data_owner);
5571 data_owner[16] = '\0';
5572 offset += 16;
5573
5574 if (name == data_owner) {
5575 offset_t payload_offset = m_data.GetU64_unchecked(offset_ptr: &offset);
5576 offset_t payload_size = m_data.GetU64_unchecked(offset_ptr: &offset);
5577 results.push_back(x: {payload_offset, payload_size});
5578 }
5579 }
5580 offset = cmd_offset + lc.cmdsize;
5581 }
5582 }
5583 return results;
5584}
5585
5586std::string ObjectFileMachO::GetIdentifierString() {
5587 Log *log(
5588 GetLog(mask: LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5589 ModuleSP module_sp(GetModule());
5590 if (module_sp) {
5591 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5592
5593 auto lc_notes = FindLC_NOTEByName(name: "kern ver str");
5594 for (auto lc_note : lc_notes) {
5595 offset_t payload_offset = std::get<0>(t&: lc_note);
5596 offset_t payload_size = std::get<1>(t&: lc_note);
5597 uint32_t version;
5598 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr) {
5599 if (version == 1) {
5600 uint32_t strsize = payload_size - sizeof(uint32_t);
5601 std::string result(strsize, '\0');
5602 m_data.CopyData(offset: payload_offset, length: strsize, dst: result.data());
5603 LLDB_LOGF(log, "LC_NOTE 'kern ver str' found with text '%s'",
5604 result.c_str());
5605 return result;
5606 }
5607 }
5608 }
5609
5610 // Second, make a pass over the load commands looking for an obsolete
5611 // LC_IDENT load command.
5612 offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
5613 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5614 const uint32_t cmd_offset = offset;
5615 llvm::MachO::ident_command ident_command;
5616 if (m_data.GetU32(offset_ptr: &offset, dst: &ident_command, count: 2) == nullptr)
5617 break;
5618 if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5619 std::string result(ident_command.cmdsize, '\0');
5620 if (m_data.CopyData(offset, length: ident_command.cmdsize, dst: result.data()) ==
5621 ident_command.cmdsize) {
5622 LLDB_LOGF(log, "LC_IDENT found with text '%s'", result.c_str());
5623 return result;
5624 }
5625 }
5626 offset = cmd_offset + ident_command.cmdsize;
5627 }
5628 }
5629 return {};
5630}
5631
5632AddressableBits ObjectFileMachO::GetAddressableBits() {
5633 AddressableBits addressable_bits;
5634
5635 Log *log(GetLog(mask: LLDBLog::Process));
5636 ModuleSP module_sp(GetModule());
5637 if (module_sp) {
5638 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5639 auto lc_notes = FindLC_NOTEByName(name: "addrable bits");
5640 for (auto lc_note : lc_notes) {
5641 offset_t payload_offset = std::get<0>(t&: lc_note);
5642 uint32_t version;
5643 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr) {
5644 if (version == 3) {
5645 uint32_t num_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5646 addressable_bits.SetAddressableBits(num_addr_bits);
5647 LLDB_LOGF(log,
5648 "LC_NOTE 'addrable bits' v3 found, value %d "
5649 "bits",
5650 num_addr_bits);
5651 }
5652 if (version == 4) {
5653 uint32_t lo_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5654 uint32_t hi_addr_bits = m_data.GetU32_unchecked(offset_ptr: &payload_offset);
5655
5656 if (lo_addr_bits == hi_addr_bits)
5657 addressable_bits.SetAddressableBits(lo_addr_bits);
5658 else
5659 addressable_bits.SetAddressableBits(lowmem_addressing_bits: lo_addr_bits, highmem_addressing_bits: hi_addr_bits);
5660 LLDB_LOGF(log, "LC_NOTE 'addrable bits' v4 found, value %d & %d bits",
5661 lo_addr_bits, hi_addr_bits);
5662 }
5663 }
5664 }
5665 }
5666 return addressable_bits;
5667}
5668
5669bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value,
5670 bool &value_is_offset,
5671 UUID &uuid,
5672 ObjectFile::BinaryType &type) {
5673 Log *log(
5674 GetLog(mask: LLDBLog::Symbols | LLDBLog::Process | LLDBLog::DynamicLoader));
5675 value = LLDB_INVALID_ADDRESS;
5676 value_is_offset = false;
5677 uuid.Clear();
5678 uint32_t log2_pagesize = 0; // not currently passed up to caller
5679 uint32_t platform = 0; // not currently passed up to caller
5680 ModuleSP module_sp(GetModule());
5681 if (module_sp) {
5682 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5683
5684 auto lc_notes = FindLC_NOTEByName(name: "main bin spec");
5685 for (auto lc_note : lc_notes) {
5686 offset_t payload_offset = std::get<0>(t&: lc_note);
5687
5688 // struct main_bin_spec
5689 // {
5690 // uint32_t version; // currently 2
5691 // uint32_t type; // 0 == unspecified,
5692 // // 1 == kernel
5693 // // 2 == user process,
5694 // dyld mach-o binary addr
5695 // // 3 == standalone binary
5696 // // 4 == user process,
5697 // // dyld_all_image_infos addr
5698 // uint64_t address; // UINT64_MAX if address not specified
5699 // uint64_t slide; // slide, UINT64_MAX if unspecified
5700 // // 0 if no slide needs to be applied to
5701 // // file address
5702 // uuid_t uuid; // all zero's if uuid not specified
5703 // uint32_t log2_pagesize; // process page size in log base 2,
5704 // // e.g. 4k pages are 12.
5705 // // 0 for unspecified
5706 // uint32_t platform; // The Mach-O platform for this corefile.
5707 // // 0 for unspecified.
5708 // // The values are defined in
5709 // // <mach-o/loader.h>, PLATFORM_*.
5710 // } __attribute((packed));
5711
5712 // "main bin spec" (main binary specification) data payload is
5713 // formatted:
5714 // uint32_t version [currently 1]
5715 // uint32_t type [0 == unspecified, 1 == kernel,
5716 // 2 == user process, 3 == firmware ]
5717 // uint64_t address [ UINT64_MAX if address not specified ]
5718 // uuid_t uuid [ all zero's if uuid not specified ]
5719 // uint32_t log2_pagesize [ process page size in log base
5720 // 2, e.g. 4k pages are 12.
5721 // 0 for unspecified ]
5722 // uint32_t unused [ for alignment ]
5723
5724 uint32_t version;
5725 if (m_data.GetU32(offset_ptr: &payload_offset, dst: &version, count: 1) != nullptr &&
5726 version <= 2) {
5727 uint32_t binspec_type = 0;
5728 uuid_t raw_uuid;
5729 memset(s: raw_uuid, c: 0, n: sizeof(uuid_t));
5730
5731 if (!m_data.GetU32(offset_ptr: &payload_offset, dst: &binspec_type, count: 1))
5732 return false;
5733 if (!m_data.GetU64(offset_ptr: &payload_offset, dst: &value, count: 1))
5734 return false;
5735 uint64_t slide = LLDB_INVALID_ADDRESS;
5736 if (version > 1 && !m_data.GetU64(offset_ptr: &payload_offset, dst: &slide, count: 1))
5737 return false;
5738 if (value == LLDB_INVALID_ADDRESS && slide != LLDB_INVALID_ADDRESS) {
5739 value = slide;
5740 value_is_offset = true;
5741 }
5742
5743 if (m_data.CopyData(offset: payload_offset, length: sizeof(uuid_t), dst: raw_uuid) != 0) {
5744 uuid = UUID(raw_uuid, sizeof(uuid_t));
5745 // convert the "main bin spec" type into our
5746 // ObjectFile::BinaryType enum
5747 const char *typestr = "unrecognized type";
5748 type = eBinaryTypeInvalid;
5749 switch (binspec_type) {
5750 case 0:
5751 type = eBinaryTypeUnknown;
5752 typestr = "uknown";
5753 break;
5754 case 1:
5755 type = eBinaryTypeKernel;
5756 typestr = "xnu kernel";
5757 break;
5758 case 2:
5759 type = eBinaryTypeUser;
5760 typestr = "userland dyld";
5761 break;
5762 case 3:
5763 type = eBinaryTypeStandalone;
5764 typestr = "standalone";
5765 break;
5766 case 4:
5767 type = eBinaryTypeUserAllImageInfos;
5768 typestr = "userland dyld_all_image_infos";
5769 break;
5770 }
5771 LLDB_LOGF(log,
5772 "LC_NOTE 'main bin spec' found, version %d type %d "
5773 "(%s), value 0x%" PRIx64 " value-is-slide==%s uuid %s",
5774 version, type, typestr, value,
5775 value_is_offset ? "true" : "false",
5776 uuid.GetAsString().c_str());
5777 if (!m_data.GetU32(offset_ptr: &payload_offset, dst: &log2_pagesize, count: 1))
5778 return false;
5779 if (version > 1 && !m_data.GetU32(offset_ptr: &payload_offset, dst: &platform, count: 1))
5780 return false;
5781 return true;
5782 }
5783 }
5784 }
5785 }
5786 return false;
5787}
5788
5789bool ObjectFileMachO::GetCorefileThreadExtraInfos(
5790 std::vector<lldb::tid_t> &tids) {
5791 tids.clear();
5792 ModuleSP module_sp(GetModule());
5793 if (module_sp) {
5794 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5795
5796 Log *log(GetLog(mask: LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
5797 auto lc_notes = FindLC_NOTEByName(name: "process metadata");
5798 for (auto lc_note : lc_notes) {
5799 offset_t payload_offset = std::get<0>(t&: lc_note);
5800 offset_t strsize = std::get<1>(t&: lc_note);
5801 std::string buf(strsize, '\0');
5802 if (m_data.CopyData(offset: payload_offset, length: strsize, dst: buf.data()) != strsize) {
5803 LLDB_LOGF(log,
5804 "Unable to read %" PRIu64
5805 " bytes of 'process metadata' LC_NOTE JSON contents",
5806 strsize);
5807 return false;
5808 }
5809 while (buf.back() == '\0')
5810 buf.resize(n: buf.size() - 1);
5811 StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(json_text: buf);
5812 StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
5813 if (!dict) {
5814 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
5815 "get a dictionary.");
5816 return false;
5817 }
5818 StructuredData::Array *threads;
5819 if (!dict->GetValueForKeyAsArray(key: "threads", result&: threads) || !threads) {
5820 LLDB_LOGF(log,
5821 "'process metadata' LC_NOTE does not have a 'threads' key");
5822 return false;
5823 }
5824 if (threads->GetSize() != GetNumThreadContexts()) {
5825 LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, number of "
5826 "threads does not match number of LC_THREADS.");
5827 return false;
5828 }
5829 const size_t num_threads = threads->GetSize();
5830 for (size_t i = 0; i < num_threads; i++) {
5831 std::optional<StructuredData::Dictionary *> maybe_thread =
5832 threads->GetItemAtIndexAsDictionary(idx: i);
5833 if (!maybe_thread) {
5834 LLDB_LOGF(log,
5835 "Unable to read 'process metadata' LC_NOTE, threads "
5836 "array does not have a dictionary at index %zu.",
5837 i);
5838 return false;
5839 }
5840 StructuredData::Dictionary *thread = *maybe_thread;
5841 lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
5842 if (thread->GetValueForKeyAsInteger<lldb::tid_t>(key: "thread_id", result&: tid))
5843 if (tid == 0)
5844 tid = LLDB_INVALID_THREAD_ID;
5845 tids.push_back(x: tid);
5846 }
5847
5848 if (log) {
5849 StreamString logmsg;
5850 logmsg.Printf(format: "LC_NOTE 'process metadata' found: ");
5851 dict->Dump(s&: logmsg, /* pretty_print */ false);
5852 LLDB_LOGF(log, "%s", logmsg.GetData());
5853 }
5854 return true;
5855 }
5856 }
5857 return false;
5858}
5859
5860lldb::RegisterContextSP
5861ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx,
5862 lldb_private::Thread &thread) {
5863 lldb::RegisterContextSP reg_ctx_sp;
5864
5865 ModuleSP module_sp(GetModule());
5866 if (module_sp) {
5867 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5868 if (!m_thread_context_offsets_valid)
5869 GetNumThreadContexts();
5870
5871 const FileRangeArray::Entry *thread_context_file_range =
5872 m_thread_context_offsets.GetEntryAtIndex(i: idx);
5873 if (thread_context_file_range) {
5874
5875 DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5876 thread_context_file_range->GetByteSize());
5877
5878 switch (m_header.cputype) {
5879 case llvm::MachO::CPU_TYPE_ARM64:
5880 case llvm::MachO::CPU_TYPE_ARM64_32:
5881 reg_ctx_sp =
5882 std::make_shared<RegisterContextDarwin_arm64_Mach>(args&: thread, args&: data);
5883 break;
5884
5885 case llvm::MachO::CPU_TYPE_ARM:
5886 reg_ctx_sp =
5887 std::make_shared<RegisterContextDarwin_arm_Mach>(args&: thread, args&: data);
5888 break;
5889
5890 case llvm::MachO::CPU_TYPE_I386:
5891 reg_ctx_sp =
5892 std::make_shared<RegisterContextDarwin_i386_Mach>(args&: thread, args&: data);
5893 break;
5894
5895 case llvm::MachO::CPU_TYPE_X86_64:
5896 reg_ctx_sp =
5897 std::make_shared<RegisterContextDarwin_x86_64_Mach>(args&: thread, args&: data);
5898 break;
5899
5900 case llvm::MachO::CPU_TYPE_RISCV:
5901 reg_ctx_sp =
5902 std::make_shared<RegisterContextDarwin_riscv32_Mach>(args&: thread, args&: data);
5903 break;
5904 }
5905 }
5906 }
5907 return reg_ctx_sp;
5908}
5909
5910ObjectFile::Type ObjectFileMachO::CalculateType() {
5911 switch (m_header.filetype) {
5912 case MH_OBJECT: // 0x1u
5913 if (GetAddressByteSize() == 4) {
5914 // 32 bit kexts are just object files, but they do have a valid
5915 // UUID load command.
5916 if (GetUUID()) {
5917 // this checking for the UUID load command is not enough we could
5918 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5919 // this is required of kexts
5920 if (m_strata == eStrataInvalid)
5921 m_strata = eStrataKernel;
5922 return eTypeSharedLibrary;
5923 }
5924 }
5925 return eTypeObjectFile;
5926
5927 case MH_EXECUTE:
5928 return eTypeExecutable; // 0x2u
5929 case MH_FVMLIB:
5930 return eTypeSharedLibrary; // 0x3u
5931 case MH_CORE:
5932 return eTypeCoreFile; // 0x4u
5933 case MH_PRELOAD:
5934 return eTypeSharedLibrary; // 0x5u
5935 case MH_DYLIB:
5936 return eTypeSharedLibrary; // 0x6u
5937 case MH_DYLINKER:
5938 return eTypeDynamicLinker; // 0x7u
5939 case MH_BUNDLE:
5940 return eTypeSharedLibrary; // 0x8u
5941 case MH_DYLIB_STUB:
5942 return eTypeStubLibrary; // 0x9u
5943 case MH_DSYM:
5944 return eTypeDebugInfo; // 0xAu
5945 case MH_KEXT_BUNDLE:
5946 return eTypeSharedLibrary; // 0xBu
5947 default:
5948 break;
5949 }
5950 return eTypeUnknown;
5951}
5952
5953ObjectFile::Strata ObjectFileMachO::CalculateStrata() {
5954 switch (m_header.filetype) {
5955 case MH_OBJECT: // 0x1u
5956 {
5957 // 32 bit kexts are just object files, but they do have a valid
5958 // UUID load command.
5959 if (GetUUID()) {
5960 // this checking for the UUID load command is not enough we could
5961 // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5962 // this is required of kexts
5963 if (m_type == eTypeInvalid)
5964 m_type = eTypeSharedLibrary;
5965
5966 return eStrataKernel;
5967 }
5968 }
5969 return eStrataUnknown;
5970
5971 case MH_EXECUTE: // 0x2u
5972 // Check for the MH_DYLDLINK bit in the flags
5973 if (m_header.flags & MH_DYLDLINK) {
5974 return eStrataUser;
5975 } else {
5976 SectionList *section_list = GetSectionList();
5977 if (section_list) {
5978 static ConstString g_kld_section_name("__KLD");
5979 if (section_list->FindSectionByName(section_dstr: g_kld_section_name))
5980 return eStrataKernel;
5981 }
5982 }
5983 return eStrataRawImage;
5984
5985 case MH_FVMLIB:
5986 return eStrataUser; // 0x3u
5987 case MH_CORE:
5988 return eStrataUnknown; // 0x4u
5989 case MH_PRELOAD:
5990 return eStrataRawImage; // 0x5u
5991 case MH_DYLIB:
5992 return eStrataUser; // 0x6u
5993 case MH_DYLINKER:
5994 return eStrataUser; // 0x7u
5995 case MH_BUNDLE:
5996 return eStrataUser; // 0x8u
5997 case MH_DYLIB_STUB:
5998 return eStrataUser; // 0x9u
5999 case MH_DSYM:
6000 return eStrataUnknown; // 0xAu
6001 case MH_KEXT_BUNDLE:
6002 return eStrataKernel; // 0xBu
6003 default:
6004 break;
6005 }
6006 return eStrataUnknown;
6007}
6008
6009llvm::VersionTuple ObjectFileMachO::GetVersion() {
6010 ModuleSP module_sp(GetModule());
6011 if (module_sp) {
6012 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
6013 llvm::MachO::dylib_command load_cmd;
6014 lldb::offset_t offset = MachHeaderSizeFromMagic(magic: m_header.magic);
6015 uint32_t version_cmd = 0;
6016 uint64_t version = 0;
6017 uint32_t i;
6018 for (i = 0; i < m_header.ncmds; ++i) {
6019 const lldb::offset_t cmd_offset = offset;
6020 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd, count: 2) == nullptr)
6021 break;
6022
6023 if (load_cmd.cmd == LC_ID_DYLIB) {
6024 if (version_cmd == 0) {
6025 version_cmd = load_cmd.cmd;
6026 if (m_data.GetU32(offset_ptr: &offset, dst: &load_cmd.dylib, count: 4) == nullptr)
6027 break;
6028 version = load_cmd.dylib.current_version;
6029 }
6030 break; // Break for now unless there is another more complete version
6031 // number load command in the future.
6032 }
6033 offset = cmd_offset + load_cmd.cmdsize;
6034 }
6035
6036 if (version_cmd == LC_ID_DYLIB) {
6037 unsigned major = (version & 0xFFFF0000ull) >> 16;
6038 unsigned minor = (version & 0x0000FF00ull) >> 8;
6039 unsigned subminor = (version & 0x000000FFull);
6040 return llvm::VersionTuple(major, minor, subminor);
6041 }
6042 }
6043 return llvm::VersionTuple();
6044}
6045
6046ArchSpec ObjectFileMachO::GetArchitecture() {
6047 ModuleSP module_sp(GetModule());
6048 ArchSpec arch;
6049 if (module_sp) {
6050 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
6051
6052 return GetArchitecture(module_sp, header: m_header, data: m_data,
6053 lc_offset: MachHeaderSizeFromMagic(magic: m_header.magic));
6054 }
6055 return arch;
6056}
6057
6058void ObjectFileMachO::GetProcessSharedCacheUUID(Process *process,
6059 addr_t &base_addr, UUID &uuid) {
6060 uuid.Clear();
6061 base_addr = LLDB_INVALID_ADDRESS;
6062 if (process && process->GetDynamicLoader()) {
6063 DynamicLoader *dl = process->GetDynamicLoader();
6064 LazyBool using_shared_cache;
6065 LazyBool private_shared_cache;
6066 dl->GetSharedCacheInformation(base_address&: base_addr, uuid, using_shared_cache,
6067 private_shared_cache);
6068 }
6069 Log *log(GetLog(mask: LLDBLog::Symbols | LLDBLog::Process));
6070 LLDB_LOGF(
6071 log,
6072 "inferior process shared cache has a UUID of %s, base address 0x%" PRIx64,
6073 uuid.GetAsString().c_str(), base_addr);
6074}
6075
6076// From dyld SPI header dyld_process_info.h
6077typedef void *dyld_process_info;
6078struct lldb_copy__dyld_process_cache_info {
6079 uuid_t cacheUUID; // UUID of cache used by process
6080 uint64_t cacheBaseAddress; // load address of dyld shared cache
6081 bool noCache; // process is running without a dyld cache
6082 bool privateCache; // process is using a private copy of its dyld cache
6083};
6084
6085// #including mach/mach.h pulls in machine.h & CPU_TYPE_ARM etc conflicts with
6086// llvm enum definitions llvm::MachO::CPU_TYPE_ARM turning them into compile
6087// errors. So we need to use the actual underlying types of task_t and
6088// kern_return_t below.
6089extern "C" unsigned int /*task_t*/ mach_task_self();
6090
6091void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) {
6092 uuid.Clear();
6093 base_addr = LLDB_INVALID_ADDRESS;
6094
6095#if defined(__APPLE__)
6096 uint8_t *(*dyld_get_all_image_infos)(void);
6097 dyld_get_all_image_infos =
6098 (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos");
6099 if (dyld_get_all_image_infos) {
6100 uint8_t *dyld_all_image_infos_address = dyld_get_all_image_infos();
6101 if (dyld_all_image_infos_address) {
6102 uint32_t *version = (uint32_t *)
6103 dyld_all_image_infos_address; // version <mach-o/dyld_images.h>
6104 if (*version >= 13) {
6105 uuid_t *sharedCacheUUID_address = 0;
6106 int wordsize = sizeof(uint8_t *);
6107 if (wordsize == 8) {
6108 sharedCacheUUID_address =
6109 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
6110 160); // sharedCacheUUID <mach-o/dyld_images.h>
6111 if (*version >= 15)
6112 base_addr =
6113 *(uint64_t
6114 *)((uint8_t *)dyld_all_image_infos_address +
6115 176); // sharedCacheBaseAddress <mach-o/dyld_images.h>
6116 } else {
6117 sharedCacheUUID_address =
6118 (uuid_t *)((uint8_t *)dyld_all_image_infos_address +
6119 84); // sharedCacheUUID <mach-o/dyld_images.h>
6120 if (*version >= 15) {
6121 base_addr = 0;
6122 base_addr =
6123 *(uint32_t
6124 *)((uint8_t *)dyld_all_image_infos_address +
6125 100); // sharedCacheBaseAddress <mach-o/dyld_images.h>
6126 }
6127 }
6128 uuid = UUID(sharedCacheUUID_address, sizeof(uuid_t));
6129 }
6130 }
6131 } else {
6132 // Exists in macOS 10.12 and later, iOS 10.0 and later - dyld SPI
6133 dyld_process_info (*dyld_process_info_create)(
6134 unsigned int /* task_t */ task, uint64_t timestamp,
6135 unsigned int /*kern_return_t*/ *kernelError);
6136 void (*dyld_process_info_get_cache)(void *info, void *cacheInfo);
6137 void (*dyld_process_info_release)(dyld_process_info info);
6138
6139 dyld_process_info_create = (void *(*)(unsigned int /* task_t */, uint64_t,
6140 unsigned int /*kern_return_t*/ *))
6141 dlsym(RTLD_DEFAULT, "_dyld_process_info_create");
6142 dyld_process_info_get_cache = (void (*)(void *, void *))dlsym(
6143 RTLD_DEFAULT, "_dyld_process_info_get_cache");
6144 dyld_process_info_release =
6145 (void (*)(void *))dlsym(RTLD_DEFAULT, "_dyld_process_info_release");
6146
6147 if (dyld_process_info_create && dyld_process_info_get_cache) {
6148 unsigned int /*kern_return_t */ kern_ret;
6149 dyld_process_info process_info =
6150 dyld_process_info_create(::mach_task_self(), 0, &kern_ret);
6151 if (process_info) {
6152 struct lldb_copy__dyld_process_cache_info sc_info;
6153 memset(&sc_info, 0, sizeof(struct lldb_copy__dyld_process_cache_info));
6154 dyld_process_info_get_cache(process_info, &sc_info);
6155 if (sc_info.cacheBaseAddress != 0) {
6156 base_addr = sc_info.cacheBaseAddress;
6157 uuid = UUID(sc_info.cacheUUID, sizeof(uuid_t));
6158 }
6159 dyld_process_info_release(process_info);
6160 }
6161 }
6162 }
6163 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
6164 if (log && uuid.IsValid())
6165 LLDB_LOGF(log,
6166 "lldb's in-memory shared cache has a UUID of %s base address of "
6167 "0x%" PRIx64,
6168 uuid.GetAsString().c_str(), base_addr);
6169#endif
6170}
6171
6172static llvm::VersionTuple FindMinimumVersionInfo(DataExtractor &data,
6173 lldb::offset_t offset,
6174 size_t ncmds) {
6175 for (size_t i = 0; i < ncmds; i++) {
6176 const lldb::offset_t load_cmd_offset = offset;
6177 llvm::MachO::load_command lc = {};
6178 if (data.GetU32(offset_ptr: &offset, dst: &lc.cmd, count: 2) == nullptr)
6179 break;
6180
6181 uint32_t version = 0;
6182 if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX ||
6183 lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS ||
6184 lc.cmd == llvm::MachO::LC_VERSION_MIN_TVOS ||
6185 lc.cmd == llvm::MachO::LC_VERSION_MIN_WATCHOS) {
6186 // struct version_min_command {
6187 // uint32_t cmd; // LC_VERSION_MIN_*
6188 // uint32_t cmdsize;
6189 // uint32_t version; // X.Y.Z encoded in nibbles xxxx.yy.zz
6190 // uint32_t sdk;
6191 // };
6192 // We want to read version.
6193 version = data.GetU32(offset_ptr: &offset);
6194 } else if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) {
6195 // struct build_version_command {
6196 // uint32_t cmd; // LC_BUILD_VERSION
6197 // uint32_t cmdsize;
6198 // uint32_t platform;
6199 // uint32_t minos; // X.Y.Z encoded in nibbles xxxx.yy.zz
6200 // uint32_t sdk;
6201 // uint32_t ntools;
6202 // };
6203 // We want to read minos.
6204 offset += sizeof(uint32_t); // Skip over platform
6205 version = data.GetU32(offset_ptr: &offset); // Extract minos
6206 }
6207
6208 if (version) {
6209 const uint32_t xxxx = version >> 16;
6210 const uint32_t yy = (version >> 8) & 0xffu;
6211 const uint32_t zz = version & 0xffu;
6212 if (xxxx)
6213 return llvm::VersionTuple(xxxx, yy, zz);
6214 }
6215 offset = load_cmd_offset + lc.cmdsize;
6216 }
6217 return llvm::VersionTuple();
6218}
6219
6220llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() {
6221 if (!m_min_os_version)
6222 m_min_os_version = FindMinimumVersionInfo(
6223 data&: m_data, offset: MachHeaderSizeFromMagic(magic: m_header.magic), ncmds: m_header.ncmds);
6224 return *m_min_os_version;
6225}
6226
6227llvm::VersionTuple ObjectFileMachO::GetSDKVersion() {
6228 if (!m_sdk_versions)
6229 m_sdk_versions = FindMinimumVersionInfo(
6230 data&: m_data, offset: MachHeaderSizeFromMagic(magic: m_header.magic), ncmds: m_header.ncmds);
6231 return *m_sdk_versions;
6232}
6233
6234bool ObjectFileMachO::GetIsDynamicLinkEditor() {
6235 return m_header.filetype == llvm::MachO::MH_DYLINKER;
6236}
6237
6238bool ObjectFileMachO::CanTrustAddressRanges() {
6239 // Dsymutil guarantees that the .debug_aranges accelerator is complete and can
6240 // be trusted by LLDB.
6241 return m_header.filetype == llvm::MachO::MH_DSYM;
6242}
6243
6244bool ObjectFileMachO::AllowAssemblyEmulationUnwindPlans() {
6245 return m_allow_assembly_emulation_unwind_plans;
6246}
6247
6248Section *ObjectFileMachO::GetMachHeaderSection() {
6249 // Find the first address of the mach header which is the first non-zero file
6250 // sized section whose file offset is zero. This is the base file address of
6251 // the mach-o file which can be subtracted from the vmaddr of the other
6252 // segments found in memory and added to the load address
6253 ModuleSP module_sp = GetModule();
6254 if (!module_sp)
6255 return nullptr;
6256 SectionList *section_list = GetSectionList();
6257 if (!section_list)
6258 return nullptr;
6259
6260 // Some binaries can have a TEXT segment with a non-zero file offset.
6261 // Binaries in the shared cache are one example. Some hand-generated
6262 // binaries may not be laid out in the normal TEXT,DATA,LC_SYMTAB order
6263 // in the file, even though they're laid out correctly in vmaddr terms.
6264 SectionSP text_segment_sp =
6265 section_list->FindSectionByName(section_dstr: GetSegmentNameTEXT());
6266 if (text_segment_sp.get() && SectionIsLoadable(section: text_segment_sp.get()))
6267 return text_segment_sp.get();
6268
6269 const size_t num_sections = section_list->GetSize();
6270 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6271 Section *section = section_list->GetSectionAtIndex(idx: sect_idx).get();
6272 if (section->GetFileOffset() == 0 && SectionIsLoadable(section))
6273 return section;
6274 }
6275
6276 return nullptr;
6277}
6278
6279bool ObjectFileMachO::SectionIsLoadable(const Section *section) {
6280 if (!section)
6281 return false;
6282 if (section->IsThreadSpecific())
6283 return false;
6284 if (GetModule().get() != section->GetModule().get())
6285 return false;
6286 // firmware style binaries with llvm gcov segment do
6287 // not have that segment mapped into memory.
6288 if (section->GetName() == GetSegmentNameLLVM_COV()) {
6289 const Strata strata = GetStrata();
6290 if (strata == eStrataKernel || strata == eStrataRawImage)
6291 return false;
6292 }
6293 // Be careful with __LINKEDIT and __DWARF segments
6294 if (section->GetName() == GetSegmentNameLINKEDIT() ||
6295 section->GetName() == GetSegmentNameDWARF()) {
6296 // Only map __LINKEDIT and __DWARF if we have an in memory image and
6297 // this isn't a kernel binary like a kext or mach_kernel.
6298 const bool is_memory_image = (bool)m_process_wp.lock();
6299 const Strata strata = GetStrata();
6300 if (is_memory_image == false || strata == eStrataKernel)
6301 return false;
6302 }
6303 return true;
6304}
6305
6306lldb::addr_t ObjectFileMachO::CalculateSectionLoadAddressForMemoryImage(
6307 lldb::addr_t header_load_address, const Section *header_section,
6308 const Section *section) {
6309 ModuleSP module_sp = GetModule();
6310 if (module_sp && header_section && section &&
6311 header_load_address != LLDB_INVALID_ADDRESS) {
6312 lldb::addr_t file_addr = header_section->GetFileAddress();
6313 if (file_addr != LLDB_INVALID_ADDRESS && SectionIsLoadable(section))
6314 return section->GetFileAddress() - file_addr + header_load_address;
6315 }
6316 return LLDB_INVALID_ADDRESS;
6317}
6318
6319bool ObjectFileMachO::SetLoadAddress(Target &target, lldb::addr_t value,
6320 bool value_is_offset) {
6321 Log *log(GetLog(mask: LLDBLog::DynamicLoader));
6322 ModuleSP module_sp = GetModule();
6323 if (!module_sp)
6324 return false;
6325
6326 SectionList *section_list = GetSectionList();
6327 if (!section_list)
6328 return false;
6329
6330 size_t num_loaded_sections = 0;
6331 const size_t num_sections = section_list->GetSize();
6332
6333 // Warn if some top-level segments map to the same address. The binary may be
6334 // malformed.
6335 const bool warn_multiple = true;
6336
6337 if (log) {
6338 StreamString logmsg;
6339 logmsg << "ObjectFileMachO::SetLoadAddress ";
6340 if (GetFileSpec())
6341 logmsg << "path='" << GetFileSpec().GetPath() << "' ";
6342 if (GetUUID()) {
6343 logmsg << "uuid=" << GetUUID().GetAsString();
6344 }
6345 LLDB_LOGF(log, "%s", logmsg.GetData());
6346 }
6347 if (value_is_offset) {
6348 // "value" is an offset to apply to each top level segment
6349 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6350 // Iterate through the object file sections to find all of the
6351 // sections that size on disk (to avoid __PAGEZERO) and load them
6352 SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx));
6353 if (SectionIsLoadable(section: section_sp.get())) {
6354 LLDB_LOGF(log,
6355 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6356 "0x%" PRIx64,
6357 section_sp->GetName().AsCString(),
6358 section_sp->GetFileAddress() + value);
6359 if (target.SetSectionLoadAddress(section: section_sp,
6360 load_addr: section_sp->GetFileAddress() + value,
6361 warn_multiple))
6362 ++num_loaded_sections;
6363 }
6364 }
6365 } else {
6366 // "value" is the new base address of the mach_header, adjust each
6367 // section accordingly
6368
6369 Section *mach_header_section = GetMachHeaderSection();
6370 if (mach_header_section) {
6371 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
6372 SectionSP section_sp(section_list->GetSectionAtIndex(idx: sect_idx));
6373
6374 lldb::addr_t section_load_addr =
6375 CalculateSectionLoadAddressForMemoryImage(
6376 header_load_address: value, header_section: mach_header_section, section: section_sp.get());
6377 if (section_load_addr != LLDB_INVALID_ADDRESS) {
6378 LLDB_LOGF(log,
6379 "ObjectFileMachO::SetLoadAddress segment '%s' load addr is "
6380 "0x%" PRIx64,
6381 section_sp->GetName().AsCString(), section_load_addr);
6382 if (target.SetSectionLoadAddress(section: section_sp, load_addr: section_load_addr,
6383 warn_multiple))
6384 ++num_loaded_sections;
6385 }
6386 }
6387 }
6388 }
6389 return num_loaded_sections > 0;
6390}
6391
6392struct all_image_infos_header {
6393 uint32_t version; // currently 1
6394 uint32_t imgcount; // number of binary images
6395 uint64_t entries_fileoff; // file offset in the corefile of where the array of
6396 // struct entry's begin.
6397 uint32_t entries_size; // size of 'struct entry'.
6398 uint32_t unused;
6399};
6400
6401struct image_entry {
6402 uint64_t filepath_offset; // offset in corefile to c-string of the file path,
6403 // UINT64_MAX if unavailable.
6404 uuid_t uuid; // uint8_t[16]. should be set to all zeroes if
6405 // uuid is unknown.
6406 uint64_t load_address; // UINT64_MAX if unknown.
6407 uint64_t seg_addrs_offset; // offset to the array of struct segment_vmaddr's.
6408 uint32_t segment_count; // The number of segments for this binary.
6409 uint32_t unused;
6410
6411 image_entry() {
6412 filepath_offset = UINT64_MAX;
6413 memset(s: &uuid, c: 0, n: sizeof(uuid_t));
6414 segment_count = 0;
6415 load_address = UINT64_MAX;
6416 seg_addrs_offset = UINT64_MAX;
6417 unused = 0;
6418 }
6419 image_entry(const image_entry &rhs) {
6420 filepath_offset = rhs.filepath_offset;
6421 memcpy(dest: &uuid, src: &rhs.uuid, n: sizeof(uuid_t));
6422 segment_count = rhs.segment_count;
6423 seg_addrs_offset = rhs.seg_addrs_offset;
6424 load_address = rhs.load_address;
6425 unused = rhs.unused;
6426 }
6427};
6428
6429struct segment_vmaddr {
6430 char segname[16];
6431 uint64_t vmaddr;
6432 uint64_t unused;
6433
6434 segment_vmaddr() {
6435 memset(s: &segname, c: 0, n: 16);
6436 vmaddr = UINT64_MAX;
6437 unused = 0;
6438 }
6439 segment_vmaddr(const segment_vmaddr &rhs) {
6440 memcpy(dest: &segname, src: &rhs.segname, n: 16);
6441 vmaddr = rhs.vmaddr;
6442 unused = rhs.unused;
6443 }
6444};
6445
6446// Write the payload for the "all image infos" LC_NOTE into
6447// the supplied all_image_infos_payload, assuming that this
6448// will be written into the corefile starting at
6449// initial_file_offset.
6450//
6451// The placement of this payload is a little tricky. We're
6452// laying this out as
6453//
6454// 1. header (struct all_image_info_header)
6455// 2. Array of fixed-size (struct image_entry)'s, one
6456// per binary image present in the process.
6457// 3. Arrays of (struct segment_vmaddr)'s, a varying number
6458// for each binary image.
6459// 4. Variable length c-strings of binary image filepaths,
6460// one per binary.
6461//
6462// To compute where everything will be laid out in the
6463// payload, we need to iterate over the images and calculate
6464// how many segment_vmaddr structures each image will need,
6465// and how long each image's filepath c-string is. There
6466// are some multiple passes over the image list while calculating
6467// everything.
6468
6469static offset_t
6470CreateAllImageInfosPayload(const lldb::ProcessSP &process_sp,
6471 offset_t initial_file_offset,
6472 StreamString &all_image_infos_payload,
6473 lldb_private::SaveCoreOptions &options) {
6474 Target &target = process_sp->GetTarget();
6475 ModuleList modules = target.GetImages();
6476
6477 // stack-only corefiles have no reason to include binaries that
6478 // are not executing; we're trying to make the smallest corefile
6479 // we can, so leave the rest out.
6480 if (options.GetStyle() == SaveCoreStyle::eSaveCoreStackOnly)
6481 modules.Clear();
6482
6483 std::set<std::string> executing_uuids;
6484 std::vector<ThreadSP> thread_list =
6485 process_sp->CalculateCoreFileThreadList(core_options: options);
6486 for (const ThreadSP &thread_sp : thread_list) {
6487 uint32_t stack_frame_count = thread_sp->GetStackFrameCount();
6488 for (uint32_t j = 0; j < stack_frame_count; j++) {
6489 StackFrameSP stack_frame_sp = thread_sp->GetStackFrameAtIndex(idx: j);
6490 Address pc = stack_frame_sp->GetFrameCodeAddress();
6491 ModuleSP module_sp = pc.GetModule();
6492 if (module_sp) {
6493 UUID uuid = module_sp->GetUUID();
6494 if (uuid.IsValid()) {
6495 executing_uuids.insert(x: uuid.GetAsString());
6496 modules.AppendIfNeeded(new_module: module_sp);
6497 }
6498 }
6499 }
6500 }
6501 size_t modules_count = modules.GetSize();
6502
6503 struct all_image_infos_header infos;
6504 infos.version = 1;
6505 infos.imgcount = modules_count;
6506 infos.entries_size = sizeof(image_entry);
6507 infos.entries_fileoff = initial_file_offset + sizeof(all_image_infos_header);
6508 infos.unused = 0;
6509
6510 all_image_infos_payload.PutHex32(uvalue: infos.version);
6511 all_image_infos_payload.PutHex32(uvalue: infos.imgcount);
6512 all_image_infos_payload.PutHex64(uvalue: infos.entries_fileoff);
6513 all_image_infos_payload.PutHex32(uvalue: infos.entries_size);
6514 all_image_infos_payload.PutHex32(uvalue: infos.unused);
6515
6516 // First create the structures for all of the segment name+vmaddr vectors
6517 // for each module, so we will know the size of them as we add the
6518 // module entries.
6519 std::vector<std::vector<segment_vmaddr>> modules_segment_vmaddrs;
6520 for (size_t i = 0; i < modules_count; i++) {
6521 ModuleSP module = modules.GetModuleAtIndex(idx: i);
6522
6523 SectionList *sections = module->GetSectionList();
6524 size_t sections_count = sections->GetSize();
6525 std::vector<segment_vmaddr> segment_vmaddrs;
6526 for (size_t j = 0; j < sections_count; j++) {
6527 SectionSP section = sections->GetSectionAtIndex(idx: j);
6528 if (!section->GetParent().get()) {
6529 addr_t vmaddr = section->GetLoadBaseAddress(target: &target);
6530 if (vmaddr == LLDB_INVALID_ADDRESS)
6531 continue;
6532 ConstString name = section->GetName();
6533 segment_vmaddr seg_vmaddr;
6534 // This is the uncommon case where strncpy is exactly
6535 // the right one, doesn't need to be nul terminated.
6536 // The segment name in a Mach-O LC_SEGMENT/LC_SEGMENT_64 is char[16] and
6537 // is not guaranteed to be nul-terminated if all 16 characters are
6538 // used.
6539 // coverity[buffer_size_warning]
6540 strncpy(dest: seg_vmaddr.segname, src: name.AsCString(),
6541 n: sizeof(seg_vmaddr.segname));
6542 seg_vmaddr.vmaddr = vmaddr;
6543 seg_vmaddr.unused = 0;
6544 segment_vmaddrs.push_back(x: seg_vmaddr);
6545 }
6546 }
6547 modules_segment_vmaddrs.push_back(x: segment_vmaddrs);
6548 }
6549
6550 offset_t size_of_vmaddr_structs = 0;
6551 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6552 size_of_vmaddr_structs +=
6553 modules_segment_vmaddrs[i].size() * sizeof(segment_vmaddr);
6554 }
6555
6556 offset_t size_of_filepath_cstrings = 0;
6557 for (size_t i = 0; i < modules_count; i++) {
6558 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6559 size_of_filepath_cstrings += module_sp->GetFileSpec().GetPath().size() + 1;
6560 }
6561
6562 // Calculate the file offsets of our "all image infos" payload in the
6563 // corefile. initial_file_offset the original value passed in to this method.
6564
6565 offset_t start_of_entries =
6566 initial_file_offset + sizeof(all_image_infos_header);
6567 offset_t start_of_seg_vmaddrs =
6568 start_of_entries + sizeof(image_entry) * modules_count;
6569 offset_t start_of_filenames = start_of_seg_vmaddrs + size_of_vmaddr_structs;
6570
6571 offset_t final_file_offset = start_of_filenames + size_of_filepath_cstrings;
6572
6573 // Now write the one-per-module 'struct image_entry' into the
6574 // StringStream; keep track of where the struct segment_vmaddr
6575 // entries for each module will end up in the corefile.
6576
6577 offset_t current_string_offset = start_of_filenames;
6578 offset_t current_segaddrs_offset = start_of_seg_vmaddrs;
6579 for (size_t i = 0; i < modules_count; i++) {
6580 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6581
6582 struct image_entry ent;
6583 memcpy(dest: &ent.uuid, src: module_sp->GetUUID().GetBytes().data(), n: sizeof(ent.uuid));
6584 if (modules_segment_vmaddrs[i].size() > 0) {
6585 ent.segment_count = modules_segment_vmaddrs[i].size();
6586 ent.seg_addrs_offset = current_segaddrs_offset;
6587 }
6588 ent.filepath_offset = current_string_offset;
6589 ObjectFile *objfile = module_sp->GetObjectFile();
6590 if (objfile) {
6591 Address base_addr(objfile->GetBaseAddress());
6592 if (base_addr.IsValid()) {
6593 ent.load_address = base_addr.GetLoadAddress(target: &target);
6594 }
6595 }
6596
6597 all_image_infos_payload.PutHex64(uvalue: ent.filepath_offset);
6598 all_image_infos_payload.PutRawBytes(s: ent.uuid, src_len: sizeof(ent.uuid));
6599 all_image_infos_payload.PutHex64(uvalue: ent.load_address);
6600 all_image_infos_payload.PutHex64(uvalue: ent.seg_addrs_offset);
6601 all_image_infos_payload.PutHex32(uvalue: ent.segment_count);
6602
6603 if (executing_uuids.find(x: module_sp->GetUUID().GetAsString()) !=
6604 executing_uuids.end())
6605 all_image_infos_payload.PutHex32(uvalue: 1);
6606 else
6607 all_image_infos_payload.PutHex32(uvalue: 0);
6608
6609 current_segaddrs_offset += ent.segment_count * sizeof(segment_vmaddr);
6610 current_string_offset += module_sp->GetFileSpec().GetPath().size() + 1;
6611 }
6612
6613 // Now write the struct segment_vmaddr entries into the StringStream.
6614
6615 for (size_t i = 0; i < modules_segment_vmaddrs.size(); i++) {
6616 if (modules_segment_vmaddrs[i].size() == 0)
6617 continue;
6618 for (struct segment_vmaddr segvm : modules_segment_vmaddrs[i]) {
6619 all_image_infos_payload.PutRawBytes(s: segvm.segname, src_len: sizeof(segvm.segname));
6620 all_image_infos_payload.PutHex64(uvalue: segvm.vmaddr);
6621 all_image_infos_payload.PutHex64(uvalue: segvm.unused);
6622 }
6623 }
6624
6625 for (size_t i = 0; i < modules_count; i++) {
6626 ModuleSP module_sp = modules.GetModuleAtIndex(idx: i);
6627 std::string filepath = module_sp->GetFileSpec().GetPath();
6628 all_image_infos_payload.PutRawBytes(s: filepath.data(), src_len: filepath.size() + 1);
6629 }
6630
6631 return final_file_offset;
6632}
6633
6634// Temp struct used to combine contiguous memory regions with
6635// identical permissions.
6636struct page_object {
6637 addr_t addr;
6638 addr_t size;
6639 uint32_t prot;
6640};
6641
6642bool ObjectFileMachO::SaveCore(const lldb::ProcessSP &process_sp,
6643 lldb_private::SaveCoreOptions &options,
6644 Status &error) {
6645 // The FileSpec and Process are already checked in PluginManager::SaveCore.
6646 assert(options.GetOutputFile().has_value());
6647 assert(process_sp);
6648 const FileSpec outfile = options.GetOutputFile().value();
6649
6650 // MachO defaults to dirty pages
6651 if (options.GetStyle() == SaveCoreStyle::eSaveCoreUnspecified)
6652 options.SetStyle(eSaveCoreDirtyOnly);
6653
6654 Target &target = process_sp->GetTarget();
6655 const ArchSpec target_arch = target.GetArchitecture();
6656 const llvm::Triple &target_triple = target_arch.GetTriple();
6657 if (target_triple.getVendor() == llvm::Triple::Apple &&
6658 (target_triple.getOS() == llvm::Triple::MacOSX ||
6659 target_triple.getOS() == llvm::Triple::IOS ||
6660 target_triple.getOS() == llvm::Triple::WatchOS ||
6661 target_triple.getOS() == llvm::Triple::TvOS ||
6662 target_triple.getOS() == llvm::Triple::BridgeOS ||
6663 target_triple.getOS() == llvm::Triple::XROS)) {
6664 bool make_core = false;
6665 switch (target_arch.GetMachine()) {
6666 case llvm::Triple::aarch64:
6667 case llvm::Triple::aarch64_32:
6668 case llvm::Triple::arm:
6669 case llvm::Triple::thumb:
6670 case llvm::Triple::x86:
6671 case llvm::Triple::x86_64:
6672 make_core = true;
6673 break;
6674 default:
6675 error = Status::FromErrorStringWithFormat(
6676 format: "unsupported core architecture: %s", target_triple.str().c_str());
6677 break;
6678 }
6679
6680 if (make_core) {
6681 CoreFileMemoryRanges core_ranges;
6682 error = process_sp->CalculateCoreFileSaveRanges(core_options: options, ranges&: core_ranges);
6683 if (error.Success()) {
6684 const uint32_t addr_byte_size = target_arch.GetAddressByteSize();
6685 const ByteOrder byte_order = target_arch.GetByteOrder();
6686 std::vector<llvm::MachO::segment_command_64> segment_load_commands;
6687 for (const auto &core_range_info : core_ranges) {
6688 // TODO: Refactor RangeDataVector to have a data iterator.
6689 const auto &core_range = core_range_info.data;
6690 uint32_t cmd_type = LC_SEGMENT_64;
6691 uint32_t segment_size = sizeof(llvm::MachO::segment_command_64);
6692 if (addr_byte_size == 4) {
6693 cmd_type = LC_SEGMENT;
6694 segment_size = sizeof(llvm::MachO::segment_command);
6695 }
6696 // Skip any ranges with no read/write/execute permissions and empty
6697 // ranges.
6698 if (core_range.lldb_permissions == 0 || core_range.range.size() == 0)
6699 continue;
6700 uint32_t vm_prot = 0;
6701 if (core_range.lldb_permissions & ePermissionsReadable)
6702 vm_prot |= VM_PROT_READ;
6703 if (core_range.lldb_permissions & ePermissionsWritable)
6704 vm_prot |= VM_PROT_WRITE;
6705 if (core_range.lldb_permissions & ePermissionsExecutable)
6706 vm_prot |= VM_PROT_EXECUTE;
6707 const addr_t vm_addr = core_range.range.start();
6708 const addr_t vm_size = core_range.range.size();
6709 llvm::MachO::segment_command_64 segment = {
6710 .cmd: cmd_type, // uint32_t cmd;
6711 .cmdsize: segment_size, // uint32_t cmdsize;
6712 .segname: {0}, // char segname[16];
6713 .vmaddr: vm_addr, // uint64_t vmaddr; // uint32_t for 32-bit Mach-O
6714 .vmsize: vm_size, // uint64_t vmsize; // uint32_t for 32-bit Mach-O
6715 .fileoff: 0, // uint64_t fileoff; // uint32_t for 32-bit Mach-O
6716 .filesize: vm_size, // uint64_t filesize; // uint32_t for 32-bit Mach-O
6717 .maxprot: vm_prot, // uint32_t maxprot;
6718 .initprot: vm_prot, // uint32_t initprot;
6719 .nsects: 0, // uint32_t nsects;
6720 .flags: 0}; // uint32_t flags;
6721 segment_load_commands.push_back(x: segment);
6722 }
6723
6724 StreamString buffer(Stream::eBinary, addr_byte_size, byte_order);
6725
6726 llvm::MachO::mach_header_64 mach_header;
6727 mach_header.magic = addr_byte_size == 8 ? MH_MAGIC_64 : MH_MAGIC;
6728 mach_header.cputype = target_arch.GetMachOCPUType();
6729 mach_header.cpusubtype = target_arch.GetMachOCPUSubType();
6730 mach_header.filetype = MH_CORE;
6731 mach_header.ncmds = segment_load_commands.size();
6732 mach_header.flags = 0;
6733 mach_header.reserved = 0;
6734 ThreadList &thread_list = process_sp->GetThreadList();
6735 const uint32_t num_threads = thread_list.GetSize();
6736
6737 // Make an array of LC_THREAD data items. Each one contains the
6738 // contents of the LC_THREAD load command. The data doesn't contain
6739 // the load command + load command size, we will add the load command
6740 // and load command size as we emit the data.
6741 std::vector<StreamString> LC_THREAD_datas(num_threads);
6742 for (auto &LC_THREAD_data : LC_THREAD_datas) {
6743 LC_THREAD_data.GetFlags().Set(Stream::eBinary);
6744 LC_THREAD_data.SetAddressByteSize(addr_byte_size);
6745 LC_THREAD_data.SetByteOrder(byte_order);
6746 }
6747 for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
6748 ThreadSP thread_sp(thread_list.GetThreadAtIndex(idx: thread_idx));
6749 if (thread_sp) {
6750 switch (mach_header.cputype) {
6751 case llvm::MachO::CPU_TYPE_ARM64:
6752 case llvm::MachO::CPU_TYPE_ARM64_32:
6753 RegisterContextDarwin_arm64_Mach::Create_LC_THREAD(
6754 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6755 break;
6756
6757 case llvm::MachO::CPU_TYPE_ARM:
6758 RegisterContextDarwin_arm_Mach::Create_LC_THREAD(
6759 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6760 break;
6761
6762 case llvm::MachO::CPU_TYPE_I386:
6763 RegisterContextDarwin_i386_Mach::Create_LC_THREAD(
6764 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6765 break;
6766
6767 case llvm::MachO::CPU_TYPE_X86_64:
6768 RegisterContextDarwin_x86_64_Mach::Create_LC_THREAD(
6769 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6770 break;
6771
6772 case llvm::MachO::CPU_TYPE_RISCV:
6773 RegisterContextDarwin_riscv32_Mach::Create_LC_THREAD(
6774 thread: thread_sp.get(), data&: LC_THREAD_datas[thread_idx]);
6775 break;
6776 }
6777 }
6778 }
6779
6780 // The size of the load command is the size of the segments...
6781 if (addr_byte_size == 8) {
6782 mach_header.sizeofcmds = segment_load_commands.size() *
6783 sizeof(llvm::MachO::segment_command_64);
6784 } else {
6785 mach_header.sizeofcmds = segment_load_commands.size() *
6786 sizeof(llvm::MachO::segment_command);
6787 }
6788
6789 // and the size of all LC_THREAD load command
6790 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6791 ++mach_header.ncmds;
6792 mach_header.sizeofcmds += 8 + LC_THREAD_data.GetSize();
6793 }
6794
6795 // Bits will be set to indicate which bits are NOT used in
6796 // addressing in this process or 0 for unknown.
6797 uint64_t address_mask = process_sp->GetCodeAddressMask();
6798 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6799 // LC_NOTE "addrable bits"
6800 mach_header.ncmds++;
6801 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6802 }
6803
6804 // LC_NOTE "process metadata"
6805 mach_header.ncmds++;
6806 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6807
6808 // LC_NOTE "all image infos"
6809 mach_header.ncmds++;
6810 mach_header.sizeofcmds += sizeof(llvm::MachO::note_command);
6811
6812 // Write the mach header
6813 buffer.PutHex32(uvalue: mach_header.magic);
6814 buffer.PutHex32(uvalue: mach_header.cputype);
6815 buffer.PutHex32(uvalue: mach_header.cpusubtype);
6816 buffer.PutHex32(uvalue: mach_header.filetype);
6817 buffer.PutHex32(uvalue: mach_header.ncmds);
6818 buffer.PutHex32(uvalue: mach_header.sizeofcmds);
6819 buffer.PutHex32(uvalue: mach_header.flags);
6820 if (addr_byte_size == 8) {
6821 buffer.PutHex32(uvalue: mach_header.reserved);
6822 }
6823
6824 // Skip the mach header and all load commands and align to the next
6825 // 0x1000 byte boundary
6826 addr_t file_offset = buffer.GetSize() + mach_header.sizeofcmds;
6827
6828 file_offset = llvm::alignTo(Value: file_offset, Align: 16);
6829 std::vector<std::unique_ptr<LCNoteEntry>> lc_notes;
6830
6831 // Add "addrable bits" LC_NOTE when an address mask is available
6832 if (address_mask != LLDB_INVALID_ADDRESS_MASK) {
6833 std::unique_ptr<LCNoteEntry> addrable_bits_lcnote_up(
6834 new LCNoteEntry(addr_byte_size, byte_order));
6835 addrable_bits_lcnote_up->name = "addrable bits";
6836 addrable_bits_lcnote_up->payload_file_offset = file_offset;
6837 int bits = std::bitset<64>(~address_mask).count();
6838 addrable_bits_lcnote_up->payload.PutHex32(uvalue: 4); // version
6839 addrable_bits_lcnote_up->payload.PutHex32(
6840 uvalue: bits); // # of bits used for low addresses
6841 addrable_bits_lcnote_up->payload.PutHex32(
6842 uvalue: bits); // # of bits used for high addresses
6843 addrable_bits_lcnote_up->payload.PutHex32(uvalue: 0); // reserved
6844
6845 file_offset += addrable_bits_lcnote_up->payload.GetSize();
6846
6847 lc_notes.push_back(x: std::move(addrable_bits_lcnote_up));
6848 }
6849
6850 // Add "process metadata" LC_NOTE
6851 std::unique_ptr<LCNoteEntry> thread_extrainfo_lcnote_up(
6852 new LCNoteEntry(addr_byte_size, byte_order));
6853 thread_extrainfo_lcnote_up->name = "process metadata";
6854 thread_extrainfo_lcnote_up->payload_file_offset = file_offset;
6855
6856 StructuredData::DictionarySP dict(
6857 std::make_shared<StructuredData::Dictionary>());
6858 StructuredData::ArraySP threads(
6859 std::make_shared<StructuredData::Array>());
6860 for (const ThreadSP &thread_sp :
6861 process_sp->CalculateCoreFileThreadList(core_options: options)) {
6862 StructuredData::DictionarySP thread(
6863 std::make_shared<StructuredData::Dictionary>());
6864 thread->AddIntegerItem(key: "thread_id", value: thread_sp->GetID());
6865 threads->AddItem(item: thread);
6866 }
6867 dict->AddItem(key: "threads", value_sp: threads);
6868 StreamString strm;
6869 dict->Dump(s&: strm, /* pretty */ pretty_print: false);
6870 thread_extrainfo_lcnote_up->payload.PutRawBytes(s: strm.GetData(),
6871 src_len: strm.GetSize());
6872
6873 file_offset += thread_extrainfo_lcnote_up->payload.GetSize();
6874 file_offset = llvm::alignTo(Value: file_offset, Align: 16);
6875 lc_notes.push_back(x: std::move(thread_extrainfo_lcnote_up));
6876
6877 // Add "all image infos" LC_NOTE
6878 std::unique_ptr<LCNoteEntry> all_image_infos_lcnote_up(
6879 new LCNoteEntry(addr_byte_size, byte_order));
6880 all_image_infos_lcnote_up->name = "all image infos";
6881 all_image_infos_lcnote_up->payload_file_offset = file_offset;
6882 file_offset = CreateAllImageInfosPayload(
6883 process_sp, initial_file_offset: file_offset, all_image_infos_payload&: all_image_infos_lcnote_up->payload,
6884 options);
6885 lc_notes.push_back(x: std::move(all_image_infos_lcnote_up));
6886
6887 // Add LC_NOTE load commands
6888 for (auto &lcnote : lc_notes) {
6889 // Add the LC_NOTE load command to the file.
6890 buffer.PutHex32(uvalue: LC_NOTE);
6891 buffer.PutHex32(uvalue: sizeof(llvm::MachO::note_command));
6892 char namebuf[16];
6893 memset(s: namebuf, c: 0, n: sizeof(namebuf));
6894 // This is the uncommon case where strncpy is exactly
6895 // the right one, doesn't need to be nul terminated.
6896 // LC_NOTE name field is char[16] and is not guaranteed to be
6897 // nul-terminated.
6898 // coverity[buffer_size_warning]
6899 strncpy(dest: namebuf, src: lcnote->name.c_str(), n: sizeof(namebuf));
6900 buffer.PutRawBytes(s: namebuf, src_len: sizeof(namebuf));
6901 buffer.PutHex64(uvalue: lcnote->payload_file_offset);
6902 buffer.PutHex64(uvalue: lcnote->payload.GetSize());
6903 }
6904
6905 // Align to 4096-byte page boundary for the LC_SEGMENTs.
6906 file_offset = llvm::alignTo(Value: file_offset, Align: 4096);
6907
6908 for (auto &segment : segment_load_commands) {
6909 segment.fileoff = file_offset;
6910 file_offset += segment.filesize;
6911 }
6912
6913 // Write out all of the LC_THREAD load commands
6914 for (const auto &LC_THREAD_data : LC_THREAD_datas) {
6915 const size_t LC_THREAD_data_size = LC_THREAD_data.GetSize();
6916 buffer.PutHex32(uvalue: LC_THREAD);
6917 buffer.PutHex32(uvalue: 8 + LC_THREAD_data_size); // cmd + cmdsize + data
6918 buffer.Write(src: LC_THREAD_data.GetString().data(), src_len: LC_THREAD_data_size);
6919 }
6920
6921 // Write out all of the segment load commands
6922 for (const auto &segment : segment_load_commands) {
6923 buffer.PutHex32(uvalue: segment.cmd);
6924 buffer.PutHex32(uvalue: segment.cmdsize);
6925 buffer.PutRawBytes(s: segment.segname, src_len: sizeof(segment.segname));
6926 if (addr_byte_size == 8) {
6927 buffer.PutHex64(uvalue: segment.vmaddr);
6928 buffer.PutHex64(uvalue: segment.vmsize);
6929 buffer.PutHex64(uvalue: segment.fileoff);
6930 buffer.PutHex64(uvalue: segment.filesize);
6931 } else {
6932 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.vmaddr));
6933 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.vmsize));
6934 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.fileoff));
6935 buffer.PutHex32(uvalue: static_cast<uint32_t>(segment.filesize));
6936 }
6937 buffer.PutHex32(uvalue: segment.maxprot);
6938 buffer.PutHex32(uvalue: segment.initprot);
6939 buffer.PutHex32(uvalue: segment.nsects);
6940 buffer.PutHex32(uvalue: segment.flags);
6941 }
6942
6943 std::string core_file_path(outfile.GetPath());
6944 auto core_file = FileSystem::Instance().Open(
6945 file_spec: outfile, options: File::eOpenOptionWriteOnly | File::eOpenOptionTruncate |
6946 File::eOpenOptionCanCreate);
6947 if (!core_file) {
6948 error = Status::FromError(error: core_file.takeError());
6949 } else {
6950 // Read 1 page at a time
6951 uint8_t bytes[0x1000];
6952 // Write the mach header and load commands out to the core file
6953 size_t bytes_written = buffer.GetString().size();
6954 error =
6955 core_file.get()->Write(buf: buffer.GetString().data(), num_bytes&: bytes_written);
6956 if (error.Success()) {
6957
6958 for (auto &lcnote : lc_notes) {
6959 if (core_file.get()->SeekFromStart(offset: lcnote->payload_file_offset) ==
6960 -1) {
6961 error = Status::FromErrorStringWithFormat(
6962 format: "Unable to seek to corefile pos "
6963 "to write '%s' LC_NOTE payload",
6964 lcnote->name.c_str());
6965 return false;
6966 }
6967 bytes_written = lcnote->payload.GetSize();
6968 error = core_file.get()->Write(buf: lcnote->payload.GetData(),
6969 num_bytes&: bytes_written);
6970 if (!error.Success())
6971 return false;
6972 }
6973
6974 // Now write the file data for all memory segments in the process
6975 for (const auto &segment : segment_load_commands) {
6976 if (core_file.get()->SeekFromStart(offset: segment.fileoff) == -1) {
6977 error = Status::FromErrorStringWithFormat(
6978 format: "unable to seek to offset 0x%" PRIx64 " in '%s'",
6979 segment.fileoff, core_file_path.c_str());
6980 break;
6981 }
6982
6983 target.GetDebugger().GetAsyncOutputStream()->Printf(
6984 format: "Saving %" PRId64
6985 " bytes of data for memory region at 0x%" PRIx64 "\n",
6986 segment.vmsize, segment.vmaddr);
6987 addr_t bytes_left = segment.vmsize;
6988 addr_t addr = segment.vmaddr;
6989 Status memory_read_error;
6990 while (bytes_left > 0 && error.Success()) {
6991 const size_t bytes_to_read =
6992 bytes_left > sizeof(bytes) ? sizeof(bytes) : bytes_left;
6993
6994 // In a savecore setting, we don't really care about caching,
6995 // as the data is dumped and very likely never read again,
6996 // so we call ReadMemoryFromInferior to bypass it.
6997 const size_t bytes_read = process_sp->ReadMemoryFromInferior(
6998 vm_addr: addr, buf: bytes, size: bytes_to_read, error&: memory_read_error);
6999
7000 if (bytes_read == bytes_to_read) {
7001 size_t bytes_written = bytes_read;
7002 error = core_file.get()->Write(buf: bytes, num_bytes&: bytes_written);
7003 bytes_left -= bytes_read;
7004 addr += bytes_read;
7005 } else {
7006 // Some pages within regions are not readable, those should
7007 // be zero filled
7008 memset(s: bytes, c: 0, n: bytes_to_read);
7009 size_t bytes_written = bytes_to_read;
7010 error = core_file.get()->Write(buf: bytes, num_bytes&: bytes_written);
7011 bytes_left -= bytes_to_read;
7012 addr += bytes_to_read;
7013 }
7014 }
7015 }
7016 }
7017 }
7018 }
7019 }
7020 return true; // This is the right plug to handle saving core files for
7021 // this process
7022 }
7023 return false;
7024}
7025
7026ObjectFileMachO::MachOCorefileAllImageInfos
7027ObjectFileMachO::GetCorefileAllImageInfos() {
7028 MachOCorefileAllImageInfos image_infos;
7029 Log *log(GetLog(mask: LLDBLog::Object | LLDBLog::Symbols | LLDBLog::Process |
7030 LLDBLog::DynamicLoader));
7031
7032 auto lc_notes = FindLC_NOTEByName(name: "all image infos");
7033 for (auto lc_note : lc_notes) {
7034 offset_t payload_offset = std::get<0>(t&: lc_note);
7035 // Read the struct all_image_infos_header.
7036 uint32_t version = m_data.GetU32(offset_ptr: &payload_offset);
7037 if (version != 1) {
7038 return image_infos;
7039 }
7040 uint32_t imgcount = m_data.GetU32(offset_ptr: &payload_offset);
7041 uint64_t entries_fileoff = m_data.GetU64(offset_ptr: &payload_offset);
7042 // 'entries_size' is not used, nor is the 'unused' entry.
7043 // offset += 4; // uint32_t entries_size;
7044 // offset += 4; // uint32_t unused;
7045
7046 LLDB_LOGF(log, "LC_NOTE 'all image infos' found version %d with %d images",
7047 version, imgcount);
7048 payload_offset = entries_fileoff;
7049 for (uint32_t i = 0; i < imgcount; i++) {
7050 // Read the struct image_entry.
7051 offset_t filepath_offset = m_data.GetU64(offset_ptr: &payload_offset);
7052 uuid_t uuid;
7053 memcpy(dest: &uuid, src: m_data.GetData(offset_ptr: &payload_offset, length: sizeof(uuid_t)),
7054 n: sizeof(uuid_t));
7055 uint64_t load_address = m_data.GetU64(offset_ptr: &payload_offset);
7056 offset_t seg_addrs_offset = m_data.GetU64(offset_ptr: &payload_offset);
7057 uint32_t segment_count = m_data.GetU32(offset_ptr: &payload_offset);
7058 uint32_t currently_executing = m_data.GetU32(offset_ptr: &payload_offset);
7059
7060 MachOCorefileImageEntry image_entry;
7061 image_entry.filename = (const char *)m_data.GetCStr(offset_ptr: &filepath_offset);
7062 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
7063 image_entry.load_address = load_address;
7064 image_entry.currently_executing = currently_executing;
7065
7066 offset_t seg_vmaddrs_offset = seg_addrs_offset;
7067 for (uint32_t j = 0; j < segment_count; j++) {
7068 char segname[17];
7069 m_data.CopyData(offset: seg_vmaddrs_offset, length: 16, dst: segname);
7070 segname[16] = '\0';
7071 seg_vmaddrs_offset += 16;
7072 uint64_t vmaddr = m_data.GetU64(offset_ptr: &seg_vmaddrs_offset);
7073 seg_vmaddrs_offset += 8; /* unused */
7074
7075 std::tuple<ConstString, addr_t> new_seg{ConstString(segname), vmaddr};
7076 image_entry.segment_load_addresses.push_back(x: new_seg);
7077 }
7078 LLDB_LOGF(log, " image entry: %s %s 0x%" PRIx64 " %s",
7079 image_entry.filename.c_str(),
7080 image_entry.uuid.GetAsString().c_str(),
7081 image_entry.load_address,
7082 image_entry.currently_executing ? "currently executing"
7083 : "not currently executing");
7084 image_infos.all_image_infos.push_back(x: image_entry);
7085 }
7086 }
7087
7088 lc_notes = FindLC_NOTEByName(name: "load binary");
7089 for (auto lc_note : lc_notes) {
7090 offset_t payload_offset = std::get<0>(t&: lc_note);
7091 uint32_t version = m_data.GetU32(offset_ptr: &payload_offset);
7092 if (version == 1) {
7093 uuid_t uuid;
7094 memcpy(dest: &uuid, src: m_data.GetData(offset_ptr: &payload_offset, length: sizeof(uuid_t)),
7095 n: sizeof(uuid_t));
7096 uint64_t load_address = m_data.GetU64(offset_ptr: &payload_offset);
7097 uint64_t slide = m_data.GetU64(offset_ptr: &payload_offset);
7098 std::string filename = m_data.GetCStr(offset_ptr: &payload_offset);
7099
7100 MachOCorefileImageEntry image_entry;
7101 image_entry.filename = filename;
7102 image_entry.uuid = UUID(uuid, sizeof(uuid_t));
7103 image_entry.load_address = load_address;
7104 image_entry.slide = slide;
7105 image_entry.currently_executing = true;
7106 image_infos.all_image_infos.push_back(x: image_entry);
7107 LLDB_LOGF(log,
7108 "LC_NOTE 'load binary' found, filename %s uuid %s load "
7109 "address 0x%" PRIx64 " slide 0x%" PRIx64,
7110 filename.c_str(),
7111 image_entry.uuid.IsValid()
7112 ? image_entry.uuid.GetAsString().c_str()
7113 : "00000000-0000-0000-0000-000000000000",
7114 load_address, slide);
7115 }
7116 }
7117
7118 return image_infos;
7119}
7120
7121bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) {
7122 MachOCorefileAllImageInfos image_infos = GetCorefileAllImageInfos();
7123 Log *log = GetLog(mask: LLDBLog::Object | LLDBLog::DynamicLoader);
7124 Status error;
7125
7126 bool found_platform_binary = false;
7127 ModuleList added_modules;
7128 for (MachOCorefileImageEntry &image : image_infos.all_image_infos) {
7129 ModuleSP module_sp, local_filesystem_module_sp;
7130
7131 // If this is a platform binary, it has been loaded (or registered with
7132 // the DynamicLoader to be loaded), we don't need to do any further
7133 // processing. We're not going to call ModulesDidLoad on this in this
7134 // method, so notify==true.
7135 if (process.GetTarget()
7136 .GetDebugger()
7137 .GetPlatformList()
7138 .LoadPlatformBinaryAndSetup(process: &process, addr: image.load_address,
7139 notify: true /* notify */)) {
7140 LLDB_LOGF(log,
7141 "ObjectFileMachO::%s binary at 0x%" PRIx64
7142 " is a platform binary, has been handled by a Platform plugin.",
7143 __FUNCTION__, image.load_address);
7144 continue;
7145 }
7146
7147 bool value_is_offset = image.load_address == LLDB_INVALID_ADDRESS;
7148 uint64_t value = value_is_offset ? image.slide : image.load_address;
7149 if (value_is_offset && value == LLDB_INVALID_ADDRESS) {
7150 // We have neither address nor slide; so we will find the binary
7151 // by UUID and load it at slide/offset 0.
7152 value = 0;
7153 }
7154
7155 // We have either a UUID, or we have a load address which
7156 // and can try to read load commands and find a UUID.
7157 if (image.uuid.IsValid() ||
7158 (!value_is_offset && value != LLDB_INVALID_ADDRESS)) {
7159 const bool set_load_address = image.segment_load_addresses.size() == 0;
7160 const bool notify = false;
7161 // Userland Darwin binaries will have segment load addresses via
7162 // the `all image infos` LC_NOTE.
7163 const bool allow_memory_image_last_resort =
7164 image.segment_load_addresses.size();
7165 module_sp = DynamicLoader::LoadBinaryWithUUIDAndAddress(
7166 process: &process, name: image.filename, uuid: image.uuid, value, value_is_offset,
7167 force_symbol_search: image.currently_executing, notify, set_address_in_target: set_load_address,
7168 allow_memory_image_last_resort);
7169 }
7170
7171 // We have a ModuleSP to load in the Target. Load it at the
7172 // correct address/slide and notify/load scripting resources.
7173 if (module_sp) {
7174 added_modules.Append(module_sp, notify: false /* notify */);
7175
7176 // We have a list of segment load address
7177 if (image.segment_load_addresses.size() > 0) {
7178 if (log) {
7179 std::string uuidstr = image.uuid.GetAsString();
7180 log->Printf(format: "ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7181 "UUID %s with section load addresses",
7182 module_sp->GetFileSpec().GetPath().c_str(),
7183 uuidstr.c_str());
7184 }
7185 for (auto name_vmaddr_tuple : image.segment_load_addresses) {
7186 SectionList *sectlist = module_sp->GetObjectFile()->GetSectionList();
7187 if (sectlist) {
7188 SectionSP sect_sp =
7189 sectlist->FindSectionByName(section_dstr: std::get<0>(t&: name_vmaddr_tuple));
7190 if (sect_sp) {
7191 process.GetTarget().SetSectionLoadAddress(
7192 section: sect_sp, load_addr: std::get<1>(t&: name_vmaddr_tuple));
7193 }
7194 }
7195 }
7196 } else {
7197 if (log) {
7198 std::string uuidstr = image.uuid.GetAsString();
7199 log->Printf(format: "ObjectFileMachO::LoadCoreFileImages adding binary '%s' "
7200 "UUID %s with %s 0x%" PRIx64,
7201 module_sp->GetFileSpec().GetPath().c_str(),
7202 uuidstr.c_str(),
7203 value_is_offset ? "slide" : "load address", value);
7204 }
7205 bool changed;
7206 module_sp->SetLoadAddress(target&: process.GetTarget(), value, value_is_offset,
7207 changed);
7208 }
7209 }
7210 }
7211 if (added_modules.GetSize() > 0) {
7212 process.GetTarget().ModulesDidLoad(module_list&: added_modules);
7213 process.Flush();
7214 return true;
7215 }
7216 // Return true if the only binary we found was the platform binary,
7217 // and it was loaded outside the scope of this method.
7218 if (found_platform_binary)
7219 return true;
7220
7221 // No binaries.
7222 return false;
7223}
7224

source code of lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp