1 | //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Information about the process mappings (Mac-specific parts). |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "sanitizer_platform.h" |
13 | #if SANITIZER_APPLE |
14 | #include "sanitizer_common.h" |
15 | #include "sanitizer_placement_new.h" |
16 | #include "sanitizer_procmaps.h" |
17 | |
18 | #include <mach-o/dyld.h> |
19 | #include <mach-o/loader.h> |
20 | #include <mach/mach.h> |
21 | |
22 | // These are not available in older macOS SDKs. |
23 | #ifndef CPU_SUBTYPE_X86_64_H |
24 | #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ |
25 | #endif |
26 | #ifndef CPU_SUBTYPE_ARM_V7S |
27 | #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ |
28 | #endif |
29 | #ifndef CPU_SUBTYPE_ARM_V7K |
30 | #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) |
31 | #endif |
32 | #ifndef CPU_TYPE_ARM64 |
33 | #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) |
34 | #endif |
35 | |
36 | namespace __sanitizer { |
37 | |
38 | // Contains information used to iterate through sections. |
39 | struct MemoryMappedSegmentData { |
40 | char name[kMaxSegName]; |
41 | uptr nsects; |
42 | const char *current_load_cmd_addr; |
43 | u32 lc_type; |
44 | uptr base_virt_addr; |
45 | uptr addr_mask; |
46 | }; |
47 | |
48 | template <typename Section> |
49 | static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, |
50 | bool isWritable) { |
51 | const Section *sc = (const Section *)data->current_load_cmd_addr; |
52 | data->current_load_cmd_addr += sizeof(Section); |
53 | |
54 | uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; |
55 | uptr sec_end = sec_start + sc->size; |
56 | module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, |
57 | sc->sectname); |
58 | } |
59 | |
60 | void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { |
61 | // Don't iterate over sections when the caller hasn't set up the |
62 | // data pointer, when there are no sections, or when the segment |
63 | // is executable. Avoid iterating over executable sections because |
64 | // it will confuse libignore, and because the extra granularity |
65 | // of information is not needed by any sanitizers. |
66 | if (!data_ || !data_->nsects || IsExecutable()) { |
67 | module->addAddressRange(start, end, IsExecutable(), IsWritable(), |
68 | data_ ? data_->name : nullptr); |
69 | return; |
70 | } |
71 | |
72 | do { |
73 | if (data_->lc_type == LC_SEGMENT) { |
74 | NextSectionLoad<struct section>(module, data_, IsWritable()); |
75 | #ifdef MH_MAGIC_64 |
76 | } else if (data_->lc_type == LC_SEGMENT_64) { |
77 | NextSectionLoad<struct section_64>(module, data_, IsWritable()); |
78 | #endif |
79 | } |
80 | } while (--data_->nsects); |
81 | } |
82 | |
83 | MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { |
84 | Reset(); |
85 | } |
86 | |
87 | MemoryMappingLayout::~MemoryMappingLayout() { |
88 | } |
89 | |
90 | bool MemoryMappingLayout::Error() const { |
91 | return false; |
92 | } |
93 | |
94 | // More information about Mach-O headers can be found in mach-o/loader.h |
95 | // Each Mach-O image has a header (mach_header or mach_header_64) starting with |
96 | // a magic number, and a list of linker load commands directly following the |
97 | // header. |
98 | // A load command is at least two 32-bit words: the command type and the |
99 | // command size in bytes. We're interested only in segment load commands |
100 | // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped |
101 | // into the task's address space. |
102 | // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or |
103 | // segment_command_64 correspond to the memory address, memory size and the |
104 | // file offset of the current memory segment. |
105 | // Because these fields are taken from the images as is, one needs to add |
106 | // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. |
107 | |
108 | void MemoryMappingLayout::Reset() { |
109 | // Count down from the top. |
110 | // TODO(glider): as per man 3 dyld, iterating over the headers with |
111 | // _dyld_image_count is thread-unsafe. We need to register callbacks for |
112 | // adding and removing images which will invalidate the MemoryMappingLayout |
113 | // state. |
114 | data_.current_image = _dyld_image_count(); |
115 | data_.current_load_cmd_count = -1; |
116 | data_.current_load_cmd_addr = 0; |
117 | data_.current_magic = 0; |
118 | data_.current_filetype = 0; |
119 | data_.current_arch = kModuleArchUnknown; |
120 | internal_memset(data_.current_uuid, 0, kModuleUUIDSize); |
121 | } |
122 | |
123 | // The dyld load address should be unchanged throughout process execution, |
124 | // and it is expensive to compute once many libraries have been loaded, |
125 | // so cache it here and do not reset. |
126 | static mach_header *dyld_hdr = 0; |
127 | static const char kDyldPath[] = "/usr/lib/dyld" ; |
128 | static const int kDyldImageIdx = -1; |
129 | |
130 | // static |
131 | void MemoryMappingLayout::CacheMemoryMappings() { |
132 | // No-op on Mac for now. |
133 | } |
134 | |
135 | void MemoryMappingLayout::LoadFromCache() { |
136 | // No-op on Mac for now. |
137 | } |
138 | |
139 | static bool IsDyldHdr(const mach_header *hdr) { |
140 | return (hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && |
141 | hdr->filetype == MH_DYLINKER; |
142 | } |
143 | |
144 | // _dyld_get_image_header() and related APIs don't report dyld itself. |
145 | // We work around this by manually recursing through the memory map |
146 | // until we hit a Mach header matching dyld instead. These recurse |
147 | // calls are expensive, but the first memory map generation occurs |
148 | // early in the process, when dyld is one of the only images loaded, |
149 | // so it will be hit after only a few iterations. These assumptions don't hold |
150 | // on macOS 13+ anymore (dyld itself has moved into the shared cache). |
151 | static mach_header *GetDyldImageHeaderViaVMRegion() { |
152 | vm_address_t address = 0; |
153 | |
154 | while (true) { |
155 | vm_size_t size = 0; |
156 | unsigned depth = 1; |
157 | struct vm_region_submap_info_64 info; |
158 | mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; |
159 | kern_return_t err = |
160 | vm_region_recurse_64(mach_task_self(), &address, &size, &depth, |
161 | (vm_region_info_t)&info, &count); |
162 | if (err != KERN_SUCCESS) return nullptr; |
163 | |
164 | if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { |
165 | mach_header *hdr = (mach_header *)address; |
166 | if (IsDyldHdr(hdr)) { |
167 | return hdr; |
168 | } |
169 | } |
170 | address += size; |
171 | } |
172 | } |
173 | |
174 | extern "C" { |
175 | struct dyld_shared_cache_dylib_text_info { |
176 | uint64_t version; // current version 2 |
177 | // following fields all exist in version 1 |
178 | uint64_t loadAddressUnslid; |
179 | uint64_t textSegmentSize; |
180 | uuid_t dylibUuid; |
181 | const char *path; // pointer invalid at end of iterations |
182 | // following fields all exist in version 2 |
183 | uint64_t textSegmentOffset; // offset from start of cache |
184 | }; |
185 | typedef struct dyld_shared_cache_dylib_text_info |
186 | dyld_shared_cache_dylib_text_info; |
187 | |
188 | extern bool _dyld_get_shared_cache_uuid(uuid_t uuid); |
189 | extern const void *_dyld_get_shared_cache_range(size_t *length); |
190 | extern int dyld_shared_cache_iterate_text( |
191 | const uuid_t cacheUuid, |
192 | void (^callback)(const dyld_shared_cache_dylib_text_info *info)); |
193 | } // extern "C" |
194 | |
195 | static mach_header *GetDyldImageHeaderViaSharedCache() { |
196 | uuid_t uuid; |
197 | bool hasCache = _dyld_get_shared_cache_uuid(uuid); |
198 | if (!hasCache) |
199 | return nullptr; |
200 | |
201 | size_t cacheLength; |
202 | __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength); |
203 | CHECK(cacheStart && cacheLength); |
204 | |
205 | __block mach_header *dyldHdr = nullptr; |
206 | int res = dyld_shared_cache_iterate_text( |
207 | uuid, ^(const dyld_shared_cache_dylib_text_info *info) { |
208 | CHECK_GE(info->version, 2); |
209 | mach_header *hdr = |
210 | (mach_header *)(cacheStart + info->textSegmentOffset); |
211 | if (IsDyldHdr(hdr)) |
212 | dyldHdr = hdr; |
213 | }); |
214 | CHECK_EQ(res, 0); |
215 | |
216 | return dyldHdr; |
217 | } |
218 | |
219 | const mach_header *get_dyld_hdr() { |
220 | if (!dyld_hdr) { |
221 | // On macOS 13+, dyld itself has moved into the shared cache. Looking it up |
222 | // via vm_region_recurse_64() causes spins/hangs/crashes. |
223 | if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) { |
224 | dyld_hdr = GetDyldImageHeaderViaSharedCache(); |
225 | if (!dyld_hdr) { |
226 | VReport(1, |
227 | "Failed to lookup the dyld image header in the shared cache on " |
228 | "macOS 13+ (or no shared cache in use). Falling back to " |
229 | "lookup via vm_region_recurse_64().\n" ); |
230 | dyld_hdr = GetDyldImageHeaderViaVMRegion(); |
231 | } |
232 | } else { |
233 | dyld_hdr = GetDyldImageHeaderViaVMRegion(); |
234 | } |
235 | CHECK(dyld_hdr); |
236 | } |
237 | |
238 | return dyld_hdr; |
239 | } |
240 | |
241 | // Next and NextSegmentLoad were inspired by base/sysinfo.cc in |
242 | // Google Perftools, https://github.com/gperftools/gperftools. |
243 | |
244 | // NextSegmentLoad scans the current image for the next segment load command |
245 | // and returns the start and end addresses and file offset of the corresponding |
246 | // segment. |
247 | // Note that the segment addresses are not necessarily sorted. |
248 | template <u32 kLCSegment, typename SegmentCommand> |
249 | static bool NextSegmentLoad(MemoryMappedSegment *segment, |
250 | MemoryMappedSegmentData *seg_data, |
251 | MemoryMappingLayoutData *layout_data) { |
252 | const char *lc = layout_data->current_load_cmd_addr; |
253 | |
254 | layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize; |
255 | layout_data->current_load_cmd_count--; |
256 | if (((const load_command *)lc)->cmd == kLCSegment) { |
257 | const SegmentCommand* sc = (const SegmentCommand *)lc; |
258 | uptr base_virt_addr, addr_mask; |
259 | if (layout_data->current_image == kDyldImageIdx) { |
260 | base_virt_addr = (uptr)get_dyld_hdr(); |
261 | // vmaddr is masked with 0xfffff because on macOS versions < 10.12, |
262 | // it contains an absolute address rather than an offset for dyld. |
263 | // To make matters even more complicated, this absolute address |
264 | // isn't actually the absolute segment address, but the offset portion |
265 | // of the address is accurate when combined with the dyld base address, |
266 | // and the mask will give just this offset. |
267 | addr_mask = 0xfffff; |
268 | } else { |
269 | base_virt_addr = |
270 | (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image); |
271 | addr_mask = ~0; |
272 | } |
273 | |
274 | segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; |
275 | segment->end = segment->start + sc->vmsize; |
276 | // Most callers don't need section information, so only fill this struct |
277 | // when required. |
278 | if (seg_data) { |
279 | seg_data->nsects = sc->nsects; |
280 | seg_data->current_load_cmd_addr = |
281 | (const char *)lc + sizeof(SegmentCommand); |
282 | seg_data->lc_type = kLCSegment; |
283 | seg_data->base_virt_addr = base_virt_addr; |
284 | seg_data->addr_mask = addr_mask; |
285 | internal_strncpy(seg_data->name, sc->segname, |
286 | ARRAY_SIZE(seg_data->name)); |
287 | } |
288 | |
289 | // Return the initial protection. |
290 | segment->protection = sc->initprot; |
291 | segment->offset = (layout_data->current_filetype == |
292 | /*MH_EXECUTE*/ 0x2) |
293 | ? sc->vmaddr |
294 | : sc->fileoff; |
295 | if (segment->filename) { |
296 | const char *src = (layout_data->current_image == kDyldImageIdx) |
297 | ? kDyldPath |
298 | : _dyld_get_image_name(layout_data->current_image); |
299 | internal_strncpy(segment->filename, src, segment->filename_size); |
300 | } |
301 | segment->arch = layout_data->current_arch; |
302 | internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize); |
303 | return true; |
304 | } |
305 | return false; |
306 | } |
307 | |
308 | ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { |
309 | cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; |
310 | switch (cputype) { |
311 | case CPU_TYPE_I386: |
312 | return kModuleArchI386; |
313 | case CPU_TYPE_X86_64: |
314 | if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; |
315 | if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; |
316 | CHECK(0 && "Invalid subtype of x86_64" ); |
317 | return kModuleArchUnknown; |
318 | case CPU_TYPE_ARM: |
319 | if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; |
320 | if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; |
321 | if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; |
322 | if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; |
323 | CHECK(0 && "Invalid subtype of ARM" ); |
324 | return kModuleArchUnknown; |
325 | case CPU_TYPE_ARM64: |
326 | return kModuleArchARM64; |
327 | default: |
328 | CHECK(0 && "Invalid CPU type" ); |
329 | return kModuleArchUnknown; |
330 | } |
331 | } |
332 | |
333 | static const load_command *NextCommand(const load_command *lc) { |
334 | return (const load_command *)((const char *)lc + lc->cmdsize); |
335 | } |
336 | |
337 | static void FindUUID(const load_command *first_lc, u8 *uuid_output) { |
338 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { |
339 | if (lc->cmd != LC_UUID) continue; |
340 | |
341 | const uuid_command *uuid_lc = (const uuid_command *)lc; |
342 | const uint8_t *uuid = &uuid_lc->uuid[0]; |
343 | internal_memcpy(uuid_output, uuid, kModuleUUIDSize); |
344 | return; |
345 | } |
346 | } |
347 | |
348 | static bool IsModuleInstrumented(const load_command *first_lc) { |
349 | for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { |
350 | if (lc->cmd != LC_LOAD_DYLIB) continue; |
351 | |
352 | const dylib_command *dylib_lc = (const dylib_command *)lc; |
353 | uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; |
354 | const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; |
355 | dylib_name = StripModuleName(dylib_name); |
356 | if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt." ))) { |
357 | return true; |
358 | } |
359 | } |
360 | return false; |
361 | } |
362 | |
363 | const ImageHeader *MemoryMappingLayout::CurrentImageHeader() { |
364 | const mach_header *hdr = (data_.current_image == kDyldImageIdx) |
365 | ? get_dyld_hdr() |
366 | : _dyld_get_image_header(data_.current_image); |
367 | return (const ImageHeader *)hdr; |
368 | } |
369 | |
370 | bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { |
371 | for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { |
372 | const mach_header *hdr = (const mach_header *)CurrentImageHeader(); |
373 | if (!hdr) continue; |
374 | if (data_.current_load_cmd_count < 0) { |
375 | // Set up for this image; |
376 | data_.current_load_cmd_count = hdr->ncmds; |
377 | data_.current_magic = hdr->magic; |
378 | data_.current_filetype = hdr->filetype; |
379 | data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); |
380 | switch (data_.current_magic) { |
381 | #ifdef MH_MAGIC_64 |
382 | case MH_MAGIC_64: { |
383 | data_.current_load_cmd_addr = |
384 | (const char *)hdr + sizeof(mach_header_64); |
385 | break; |
386 | } |
387 | #endif |
388 | case MH_MAGIC: { |
389 | data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); |
390 | break; |
391 | } |
392 | default: { |
393 | continue; |
394 | } |
395 | } |
396 | FindUUID((const load_command *)data_.current_load_cmd_addr, |
397 | data_.current_uuid); |
398 | data_.current_instrumented = IsModuleInstrumented( |
399 | (const load_command *)data_.current_load_cmd_addr); |
400 | } |
401 | |
402 | while (data_.current_load_cmd_count > 0) { |
403 | switch (data_.current_magic) { |
404 | // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. |
405 | #ifdef MH_MAGIC_64 |
406 | case MH_MAGIC_64: { |
407 | if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( |
408 | segment, segment->data_, &data_)) |
409 | return true; |
410 | break; |
411 | } |
412 | #endif |
413 | case MH_MAGIC: { |
414 | if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( |
415 | segment, segment->data_, &data_)) |
416 | return true; |
417 | break; |
418 | } |
419 | } |
420 | } |
421 | // If we get here, no more load_cmd's in this image talk about |
422 | // segments. Go on to the next image. |
423 | data_.current_load_cmd_count = -1; // This will trigger loading next image |
424 | } |
425 | return false; |
426 | } |
427 | |
428 | void MemoryMappingLayout::DumpListOfModules( |
429 | InternalMmapVectorNoCtor<LoadedModule> *modules) { |
430 | Reset(); |
431 | InternalMmapVector<char> module_name(kMaxPathLength); |
432 | MemoryMappedSegment segment(module_name.data(), module_name.size()); |
433 | MemoryMappedSegmentData data; |
434 | segment.data_ = &data; |
435 | while (Next(&segment)) { |
436 | if (segment.filename[0] == '\0') continue; |
437 | LoadedModule *cur_module = nullptr; |
438 | if (!modules->empty() && |
439 | 0 == internal_strcmp(segment.filename, modules->back().full_name())) { |
440 | cur_module = &modules->back(); |
441 | } else { |
442 | modules->push_back(LoadedModule()); |
443 | cur_module = &modules->back(); |
444 | cur_module->set(segment.filename, segment.start, segment.arch, |
445 | segment.uuid, data_.current_instrumented); |
446 | } |
447 | segment.AddAddressRanges(cur_module); |
448 | } |
449 | } |
450 | |
451 | } // namespace __sanitizer |
452 | |
453 | #endif // SANITIZER_APPLE |
454 | |