1 | //===-- sanitizer_symbolizer_posix_libcdep.cpp ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is shared between AddressSanitizer and ThreadSanitizer |
10 | // run-time libraries. |
11 | // POSIX-specific implementation of symbolizer parts. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "sanitizer_platform.h" |
15 | #include "sanitizer_symbolizer_markup.h" |
16 | #if SANITIZER_POSIX |
17 | # include <dlfcn.h> // for dlsym() |
18 | # include <errno.h> |
19 | # include <stdint.h> |
20 | # include <stdlib.h> |
21 | # include <sys/wait.h> |
22 | # include <unistd.h> |
23 | |
24 | # include "sanitizer_allocator_internal.h" |
25 | # include "sanitizer_common.h" |
26 | # include "sanitizer_file.h" |
27 | # include "sanitizer_flags.h" |
28 | # include "sanitizer_internal_defs.h" |
29 | # include "sanitizer_linux.h" |
30 | # include "sanitizer_placement_new.h" |
31 | # include "sanitizer_posix.h" |
32 | # include "sanitizer_procmaps.h" |
33 | # include "sanitizer_symbolizer_internal.h" |
34 | # include "sanitizer_symbolizer_libbacktrace.h" |
35 | # include "sanitizer_symbolizer_mac.h" |
36 | |
37 | // C++ demangling function, as required by Itanium C++ ABI. This is weak, |
38 | // because we do not require a C++ ABI library to be linked to a program |
39 | // using sanitizers; if it's not present, we'll just use the mangled name. |
40 | namespace __cxxabiv1 { |
41 | extern "C" SANITIZER_WEAK_ATTRIBUTE |
42 | char *__cxa_demangle(const char *mangled, char *buffer, |
43 | size_t *length, int *status); |
44 | } |
45 | |
46 | namespace __sanitizer { |
47 | |
48 | // Attempts to demangle the name via __cxa_demangle from __cxxabiv1. |
49 | const char *DemangleCXXABI(const char *name) { |
50 | // FIXME: __cxa_demangle aggressively insists on allocating memory. |
51 | // There's not much we can do about that, short of providing our |
52 | // own demangler (libc++abi's implementation could be adapted so that |
53 | // it does not allocate). For now, we just call it anyway, and we leak |
54 | // the returned value. |
55 | if (&__cxxabiv1::__cxa_demangle) |
56 | if (const char *demangled_name = |
57 | __cxxabiv1::__cxa_demangle(mangled: name, buffer: 0, length: 0, status: 0)) |
58 | return demangled_name; |
59 | |
60 | return nullptr; |
61 | } |
62 | |
63 | // As of now, there are no headers for the Swift runtime. Once they are |
64 | // present, we will weakly link since we do not require Swift runtime to be |
65 | // linked. |
66 | typedef char *(*swift_demangle_ft)(const char *mangledName, |
67 | size_t mangledNameLength, char *outputBuffer, |
68 | size_t *outputBufferSize, uint32_t flags); |
69 | static swift_demangle_ft swift_demangle_f; |
70 | |
71 | // This must not happen lazily at symbolication time, because dlsym uses |
72 | // malloc and thread-local storage, which is not a good thing to do during |
73 | // symbolication. |
74 | static void InitializeSwiftDemangler() { |
75 | swift_demangle_f = (swift_demangle_ft)dlsym(RTLD_DEFAULT, name: "swift_demangle" ); |
76 | } |
77 | |
78 | // Attempts to demangle a Swift name. The demangler will return nullptr if a |
79 | // non-Swift name is passed in. |
80 | const char *DemangleSwift(const char *name) { |
81 | if (swift_demangle_f) |
82 | return swift_demangle_f(name, internal_strlen(s: name), 0, 0, 0); |
83 | |
84 | return nullptr; |
85 | } |
86 | |
87 | const char *DemangleSwiftAndCXX(const char *name) { |
88 | if (!name) return nullptr; |
89 | if (const char *swift_demangled_name = DemangleSwift(name)) |
90 | return swift_demangled_name; |
91 | return DemangleCXXABI(name); |
92 | } |
93 | |
94 | static bool CreateTwoHighNumberedPipes(int *infd_, int *outfd_) { |
95 | int *infd = NULL; |
96 | int *outfd = NULL; |
97 | // The client program may close its stdin and/or stdout and/or stderr |
98 | // thus allowing socketpair to reuse file descriptors 0, 1 or 2. |
99 | // In this case the communication between the forked processes may be |
100 | // broken if either the parent or the child tries to close or duplicate |
101 | // these descriptors. The loop below produces two pairs of file |
102 | // descriptors, each greater than 2 (stderr). |
103 | int sock_pair[5][2]; |
104 | for (int i = 0; i < 5; i++) { |
105 | if (pipe(pipedes: sock_pair[i]) == -1) { |
106 | for (int j = 0; j < i; j++) { |
107 | internal_close(fd: sock_pair[j][0]); |
108 | internal_close(fd: sock_pair[j][1]); |
109 | } |
110 | return false; |
111 | } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) { |
112 | if (infd == NULL) { |
113 | infd = sock_pair[i]; |
114 | } else { |
115 | outfd = sock_pair[i]; |
116 | for (int j = 0; j < i; j++) { |
117 | if (sock_pair[j] == infd) continue; |
118 | internal_close(fd: sock_pair[j][0]); |
119 | internal_close(fd: sock_pair[j][1]); |
120 | } |
121 | break; |
122 | } |
123 | } |
124 | } |
125 | CHECK(infd); |
126 | CHECK(outfd); |
127 | infd_[0] = infd[0]; |
128 | infd_[1] = infd[1]; |
129 | outfd_[0] = outfd[0]; |
130 | outfd_[1] = outfd[1]; |
131 | return true; |
132 | } |
133 | |
134 | bool SymbolizerProcess::StartSymbolizerSubprocess() { |
135 | if (!FileExists(filename: path_)) { |
136 | if (!reported_invalid_path_) { |
137 | Report(format: "WARNING: invalid path to external symbolizer!\n" ); |
138 | reported_invalid_path_ = true; |
139 | } |
140 | return false; |
141 | } |
142 | |
143 | const char *argv[kArgVMax]; |
144 | GetArgV(path_to_binary: path_, argv); |
145 | pid_t pid; |
146 | |
147 | // Report how symbolizer is being launched for debugging purposes. |
148 | if (Verbosity() >= 3) { |
149 | // Only use `Report` for first line so subsequent prints don't get prefixed |
150 | // with current PID. |
151 | Report(format: "Launching Symbolizer process: " ); |
152 | for (unsigned index = 0; index < kArgVMax && argv[index]; ++index) |
153 | Printf(format: "%s " , argv[index]); |
154 | Printf(format: "\n" ); |
155 | } |
156 | |
157 | if (use_posix_spawn_) { |
158 | #if SANITIZER_APPLE |
159 | fd_t fd = internal_spawn(argv, const_cast<const char **>(GetEnvP()), &pid); |
160 | if (fd == kInvalidFd) { |
161 | Report("WARNING: failed to spawn external symbolizer (errno: %d)\n" , |
162 | errno); |
163 | return false; |
164 | } |
165 | |
166 | input_fd_ = fd; |
167 | output_fd_ = fd; |
168 | #else // SANITIZER_APPLE |
169 | UNIMPLEMENTED(); |
170 | #endif // SANITIZER_APPLE |
171 | } else { |
172 | fd_t infd[2] = {}, outfd[2] = {}; |
173 | if (!CreateTwoHighNumberedPipes(infd_: infd, outfd_: outfd)) { |
174 | Report(format: "WARNING: Can't create a socket pair to start " |
175 | "external symbolizer (errno: %d)\n" , errno); |
176 | return false; |
177 | } |
178 | |
179 | pid = StartSubprocess(filename: path_, argv, envp: GetEnvP(), /* stdin */ stdin_fd: outfd[0], |
180 | /* stdout */ stdout_fd: infd[1]); |
181 | if (pid < 0) { |
182 | internal_close(fd: infd[0]); |
183 | internal_close(fd: outfd[1]); |
184 | return false; |
185 | } |
186 | |
187 | input_fd_ = infd[0]; |
188 | output_fd_ = outfd[1]; |
189 | } |
190 | |
191 | CHECK_GT(pid, 0); |
192 | |
193 | // Check that symbolizer subprocess started successfully. |
194 | SleepForMillis(millis: kSymbolizerStartupTimeMillis); |
195 | if (!IsProcessRunning(pid)) { |
196 | // Either waitpid failed, or child has already exited. |
197 | Report(format: "WARNING: external symbolizer didn't start up correctly!\n" ); |
198 | return false; |
199 | } |
200 | |
201 | return true; |
202 | } |
203 | |
204 | class Addr2LineProcess final : public SymbolizerProcess { |
205 | public: |
206 | Addr2LineProcess(const char *path, const char *module_name) |
207 | : SymbolizerProcess(path), module_name_(internal_strdup(s: module_name)) {} |
208 | |
209 | const char *module_name() const { return module_name_; } |
210 | |
211 | private: |
212 | void GetArgV(const char *path_to_binary, |
213 | const char *(&argv)[kArgVMax]) const override { |
214 | int i = 0; |
215 | argv[i++] = path_to_binary; |
216 | if (common_flags()->demangle) |
217 | argv[i++] = "-C" ; |
218 | if (common_flags()->symbolize_inline_frames) |
219 | argv[i++] = "-i" ; |
220 | argv[i++] = "-fe" ; |
221 | argv[i++] = module_name_; |
222 | argv[i++] = nullptr; |
223 | CHECK_LE(i, kArgVMax); |
224 | } |
225 | |
226 | bool ReachedEndOfOutput(const char *buffer, uptr length) const override; |
227 | |
228 | bool ReadFromSymbolizer() override { |
229 | if (!SymbolizerProcess::ReadFromSymbolizer()) |
230 | return false; |
231 | auto &buff = GetBuff(); |
232 | // We should cut out output_terminator_ at the end of given buffer, |
233 | // appended by addr2line to mark the end of its meaningful output. |
234 | // We cannot scan buffer from it's beginning, because it is legal for it |
235 | // to start with output_terminator_ in case given offset is invalid. So, |
236 | // scanning from second character. |
237 | char *garbage = internal_strstr(haystack: buff.data() + 1, needle: output_terminator_); |
238 | // This should never be NULL since buffer must end up with |
239 | // output_terminator_. |
240 | CHECK(garbage); |
241 | |
242 | // Trim the buffer. |
243 | uintptr_t new_size = garbage - buff.data(); |
244 | GetBuff().resize(new_size); |
245 | GetBuff().push_back(element: '\0'); |
246 | return true; |
247 | } |
248 | |
249 | const char *module_name_; // Owned, leaked. |
250 | static const char output_terminator_[]; |
251 | }; |
252 | |
253 | const char Addr2LineProcess::output_terminator_[] = "??\n??:0\n" ; |
254 | |
255 | bool Addr2LineProcess::ReachedEndOfOutput(const char *buffer, |
256 | uptr length) const { |
257 | const size_t kTerminatorLen = sizeof(output_terminator_) - 1; |
258 | // Skip, if we read just kTerminatorLen bytes, because Addr2Line output |
259 | // should consist at least of two pairs of lines: |
260 | // 1. First one, corresponding to given offset to be symbolized |
261 | // (may be equal to output_terminator_, if offset is not valid). |
262 | // 2. Second one for output_terminator_, itself to mark the end of output. |
263 | if (length <= kTerminatorLen) return false; |
264 | // Addr2Line output should end up with output_terminator_. |
265 | return !internal_memcmp(s1: buffer + length - kTerminatorLen, |
266 | s2: output_terminator_, n: kTerminatorLen); |
267 | } |
268 | |
269 | class Addr2LinePool final : public SymbolizerTool { |
270 | public: |
271 | explicit Addr2LinePool(const char *addr2line_path, |
272 | LowLevelAllocator *allocator) |
273 | : addr2line_path_(addr2line_path), allocator_(allocator) { |
274 | addr2line_pool_.reserve(new_size: 16); |
275 | } |
276 | |
277 | bool SymbolizePC(uptr addr, SymbolizedStack *stack) override { |
278 | if (const char *buf = |
279 | SendCommand(module_name: stack->info.module, module_offset: stack->info.module_offset)) { |
280 | ParseSymbolizePCOutput(str: buf, res: stack); |
281 | return true; |
282 | } |
283 | return false; |
284 | } |
285 | |
286 | bool SymbolizeData(uptr addr, DataInfo *info) override { |
287 | return false; |
288 | } |
289 | |
290 | private: |
291 | const char *SendCommand(const char *module_name, uptr module_offset) { |
292 | Addr2LineProcess *addr2line = 0; |
293 | for (uptr i = 0; i < addr2line_pool_.size(); ++i) { |
294 | if (0 == |
295 | internal_strcmp(s1: module_name, s2: addr2line_pool_[i]->module_name())) { |
296 | addr2line = addr2line_pool_[i]; |
297 | break; |
298 | } |
299 | } |
300 | if (!addr2line) { |
301 | addr2line = |
302 | new(*allocator_) Addr2LineProcess(addr2line_path_, module_name); |
303 | addr2line_pool_.push_back(element: addr2line); |
304 | } |
305 | CHECK_EQ(0, internal_strcmp(module_name, addr2line->module_name())); |
306 | char buffer[kBufferSize]; |
307 | internal_snprintf(buffer, length: kBufferSize, format: "0x%zx\n0x%zx\n" , |
308 | module_offset, dummy_address_); |
309 | return addr2line->SendCommand(command: buffer); |
310 | } |
311 | |
312 | static const uptr kBufferSize = 64; |
313 | const char *addr2line_path_; |
314 | LowLevelAllocator *allocator_; |
315 | InternalMmapVector<Addr2LineProcess*> addr2line_pool_; |
316 | static const uptr dummy_address_ = |
317 | FIRST_32_SECOND_64(UINT32_MAX, UINT64_MAX); |
318 | }; |
319 | |
320 | # if SANITIZER_SUPPORTS_WEAK_HOOKS |
321 | extern "C" { |
322 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
323 | __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, |
324 | char *Buffer, int MaxLength); |
325 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
326 | __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, |
327 | char *Buffer, int MaxLength); |
328 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
329 | __sanitizer_symbolize_frame(const char *ModuleName, u64 ModuleOffset, |
330 | char *Buffer, int MaxLength); |
331 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void |
332 | __sanitizer_symbolize_flush(); |
333 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
334 | __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); |
335 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
336 | __sanitizer_symbolize_set_demangle(bool Demangle); |
337 | SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool |
338 | __sanitizer_symbolize_set_inline_frames(bool InlineFrames); |
339 | } // extern "C" |
340 | |
341 | class InternalSymbolizer final : public SymbolizerTool { |
342 | public: |
343 | static InternalSymbolizer *get(LowLevelAllocator *alloc) { |
344 | // These one is the most used one, so we will use it to detect a presence of |
345 | // internal symbolizer. |
346 | if (&__sanitizer_symbolize_code == nullptr) |
347 | return nullptr; |
348 | CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle)); |
349 | CHECK(__sanitizer_symbolize_set_inline_frames( |
350 | common_flags()->symbolize_inline_frames)); |
351 | return new (*alloc) InternalSymbolizer(); |
352 | } |
353 | |
354 | bool SymbolizePC(uptr addr, SymbolizedStack *stack) override { |
355 | bool result = __sanitizer_symbolize_code( |
356 | ModuleName: stack->info.module, ModuleOffset: stack->info.module_offset, Buffer: buffer_, MaxLength: sizeof(buffer_)); |
357 | if (result) |
358 | ParseSymbolizePCOutput(str: buffer_, res: stack); |
359 | return result; |
360 | } |
361 | |
362 | bool SymbolizeData(uptr addr, DataInfo *info) override { |
363 | bool result = __sanitizer_symbolize_data(ModuleName: info->module, ModuleOffset: info->module_offset, |
364 | Buffer: buffer_, MaxLength: sizeof(buffer_)); |
365 | if (result) { |
366 | ParseSymbolizeDataOutput(str: buffer_, info); |
367 | info->start += (addr - info->module_offset); // Add the base address. |
368 | } |
369 | return result; |
370 | } |
371 | |
372 | bool SymbolizeFrame(uptr addr, FrameInfo *info) override { |
373 | bool result = __sanitizer_symbolize_frame(ModuleName: info->module, ModuleOffset: info->module_offset, |
374 | Buffer: buffer_, MaxLength: sizeof(buffer_)); |
375 | if (result) |
376 | ParseSymbolizeFrameOutput(str: buffer_, locals: &info->locals); |
377 | return result; |
378 | } |
379 | |
380 | void Flush() override { __sanitizer_symbolize_flush(); } |
381 | |
382 | const char *Demangle(const char *name) override { |
383 | if (__sanitizer_symbolize_demangle(Name: name, Buffer: buffer_, MaxLength: sizeof(buffer_))) { |
384 | char *res_buff = nullptr; |
385 | ExtractToken(str: buffer_, delims: "" , result: &res_buff); |
386 | return res_buff; |
387 | } |
388 | return nullptr; |
389 | } |
390 | |
391 | private: |
392 | InternalSymbolizer() {} |
393 | |
394 | char buffer_[16 * 1024]; |
395 | }; |
396 | # else // SANITIZER_SUPPORTS_WEAK_HOOKS |
397 | |
398 | class InternalSymbolizer final : public SymbolizerTool { |
399 | public: |
400 | static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; } |
401 | }; |
402 | |
403 | # endif // SANITIZER_SUPPORTS_WEAK_HOOKS |
404 | |
405 | const char *Symbolizer::PlatformDemangle(const char *name) { |
406 | return DemangleSwiftAndCXX(name); |
407 | } |
408 | |
409 | static SymbolizerTool *ChooseExternalSymbolizer(LowLevelAllocator *allocator) { |
410 | const char *path = common_flags()->external_symbolizer_path; |
411 | |
412 | if (path && internal_strchr(s: path, c: '%')) { |
413 | char *new_path = (char *)InternalAlloc(size: kMaxPathLength); |
414 | SubstituteForFlagValue(s: path, out: new_path, out_size: kMaxPathLength); |
415 | path = new_path; |
416 | } |
417 | |
418 | const char *binary_name = path ? StripModuleName(module: path) : "" ; |
419 | static const char kLLVMSymbolizerPrefix[] = "llvm-symbolizer" ; |
420 | if (path && path[0] == '\0') { |
421 | VReport(2, "External symbolizer is explicitly disabled.\n" ); |
422 | return nullptr; |
423 | } else if (!internal_strncmp(s1: binary_name, s2: kLLVMSymbolizerPrefix, |
424 | n: internal_strlen(s: kLLVMSymbolizerPrefix))) { |
425 | VReport(2, "Using llvm-symbolizer at user-specified path: %s\n" , path); |
426 | return new(*allocator) LLVMSymbolizer(path, allocator); |
427 | } else if (!internal_strcmp(s1: binary_name, s2: "atos" )) { |
428 | #if SANITIZER_APPLE |
429 | VReport(2, "Using atos at user-specified path: %s\n" , path); |
430 | return new(*allocator) AtosSymbolizer(path, allocator); |
431 | #else // SANITIZER_APPLE |
432 | Report(format: "ERROR: Using `atos` is only supported on Darwin.\n" ); |
433 | Die(); |
434 | #endif // SANITIZER_APPLE |
435 | } else if (!internal_strcmp(s1: binary_name, s2: "addr2line" )) { |
436 | VReport(2, "Using addr2line at user-specified path: %s\n" , path); |
437 | return new(*allocator) Addr2LinePool(path, allocator); |
438 | } else if (path) { |
439 | Report(format: "ERROR: External symbolizer path is set to '%s' which isn't " |
440 | "a known symbolizer. Please set the path to the llvm-symbolizer " |
441 | "binary or other known tool.\n" , path); |
442 | Die(); |
443 | } |
444 | |
445 | // Otherwise symbolizer program is unknown, let's search $PATH |
446 | CHECK(path == nullptr); |
447 | #if SANITIZER_APPLE |
448 | if (const char *found_path = FindPathToBinary("atos" )) { |
449 | VReport(2, "Using atos found at: %s\n" , found_path); |
450 | return new(*allocator) AtosSymbolizer(found_path, allocator); |
451 | } |
452 | #endif // SANITIZER_APPLE |
453 | if (const char *found_path = FindPathToBinary(name: "llvm-symbolizer" )) { |
454 | VReport(2, "Using llvm-symbolizer found at: %s\n" , found_path); |
455 | return new(*allocator) LLVMSymbolizer(found_path, allocator); |
456 | } |
457 | if (common_flags()->allow_addr2line) { |
458 | if (const char *found_path = FindPathToBinary(name: "addr2line" )) { |
459 | VReport(2, "Using addr2line found at: %s\n" , found_path); |
460 | return new(*allocator) Addr2LinePool(found_path, allocator); |
461 | } |
462 | } |
463 | return nullptr; |
464 | } |
465 | |
466 | static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list, |
467 | LowLevelAllocator *allocator) { |
468 | if (!common_flags()->symbolize) { |
469 | VReport(2, "Symbolizer is disabled.\n" ); |
470 | return; |
471 | } |
472 | if (common_flags()->enable_symbolizer_markup) { |
473 | VReport(2, "Using symbolizer markup" ); |
474 | SymbolizerTool *tool = new (*allocator) MarkupSymbolizerTool(); |
475 | CHECK(tool); |
476 | list->push_back(x: tool); |
477 | } |
478 | if (IsAllocatorOutOfMemory()) { |
479 | VReport(2, "Cannot use internal symbolizer: out of memory\n" ); |
480 | } else if (SymbolizerTool *tool = InternalSymbolizer::get(alloc: allocator)) { |
481 | VReport(2, "Using internal symbolizer.\n" ); |
482 | list->push_back(x: tool); |
483 | return; |
484 | } |
485 | if (SymbolizerTool *tool = LibbacktraceSymbolizer::get(alloc: allocator)) { |
486 | VReport(2, "Using libbacktrace symbolizer.\n" ); |
487 | list->push_back(x: tool); |
488 | return; |
489 | } |
490 | |
491 | if (SymbolizerTool *tool = ChooseExternalSymbolizer(allocator)) { |
492 | list->push_back(x: tool); |
493 | } |
494 | |
495 | #if SANITIZER_APPLE |
496 | VReport(2, "Using dladdr symbolizer.\n" ); |
497 | list->push_back(new(*allocator) DlAddrSymbolizer()); |
498 | #endif // SANITIZER_APPLE |
499 | } |
500 | |
501 | Symbolizer *Symbolizer::PlatformInit() { |
502 | IntrusiveList<SymbolizerTool> list; |
503 | list.clear(); |
504 | ChooseSymbolizerTools(list: &list, allocator: &symbolizer_allocator_); |
505 | return new(symbolizer_allocator_) Symbolizer(list); |
506 | } |
507 | |
508 | void Symbolizer::LateInitialize() { |
509 | Symbolizer::GetOrInit(); |
510 | InitializeSwiftDemangler(); |
511 | } |
512 | |
513 | } // namespace __sanitizer |
514 | |
515 | #endif // SANITIZER_POSIX |
516 | |