1//===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is shared between AddressSanitizer and ThreadSanitizer
10// run-time libraries.
11//===----------------------------------------------------------------------===//
12
13#include "sanitizer_allocator_internal.h"
14#include "sanitizer_internal_defs.h"
15#include "sanitizer_platform.h"
16#include "sanitizer_symbolizer_internal.h"
17
18namespace __sanitizer {
19
20Symbolizer *Symbolizer::GetOrInit() {
21 SpinMutexLock l(&init_mu_);
22 if (symbolizer_)
23 return symbolizer_;
24 symbolizer_ = PlatformInit();
25 CHECK(symbolizer_);
26 return symbolizer_;
27}
28
29// See sanitizer_symbolizer_markup.cpp.
30#if !SANITIZER_SYMBOLIZER_MARKUP
31
32const char *ExtractToken(const char *str, const char *delims, char **result) {
33 uptr prefix_len = internal_strcspn(s: str, reject: delims);
34 *result = (char *)InternalAlloc(size: prefix_len + 1);
35 internal_memcpy(dest: *result, src: str, n: prefix_len);
36 (*result)[prefix_len] = '\0';
37 const char *prefix_end = str + prefix_len;
38 if (*prefix_end != '\0')
39 prefix_end++;
40 return prefix_end;
41}
42
43const char *ExtractInt(const char *str, const char *delims, int *result) {
44 char *buff = nullptr;
45 const char *ret = ExtractToken(str, delims, result: &buff);
46 if (buff) {
47 *result = (int)internal_atoll(nptr: buff);
48 }
49 InternalFree(p: buff);
50 return ret;
51}
52
53const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
54 char *buff = nullptr;
55 const char *ret = ExtractToken(str, delims, result: &buff);
56 if (buff) {
57 *result = (uptr)internal_atoll(nptr: buff);
58 }
59 InternalFree(p: buff);
60 return ret;
61}
62
63const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
64 char *buff = nullptr;
65 const char *ret = ExtractToken(str, delims, result: &buff);
66 if (buff) {
67 *result = (sptr)internal_atoll(nptr: buff);
68 }
69 InternalFree(p: buff);
70 return ret;
71}
72
73const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
74 char **result) {
75 const char *found_delimiter = internal_strstr(haystack: str, needle: delimiter);
76 uptr prefix_len =
77 found_delimiter ? found_delimiter - str : internal_strlen(s: str);
78 *result = (char *)InternalAlloc(size: prefix_len + 1);
79 internal_memcpy(dest: *result, src: str, n: prefix_len);
80 (*result)[prefix_len] = '\0';
81 const char *prefix_end = str + prefix_len;
82 if (*prefix_end != '\0')
83 prefix_end += internal_strlen(s: delimiter);
84 return prefix_end;
85}
86
87SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
88 Lock l(&mu_);
89 SymbolizedStack *res = SymbolizedStack::New(addr);
90 auto *mod = FindModuleForAddress(address: addr);
91 if (!mod)
92 return res;
93 // Always fill data about module name and offset.
94 res->info.FillModuleInfo(mod: *mod);
95 for (auto &tool : tools_) {
96 SymbolizerScope sym_scope(this);
97 if (tool.SymbolizePC(addr, stack: res)) {
98 return res;
99 }
100 }
101 return res;
102}
103
104bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
105 Lock l(&mu_);
106 const char *module_name = nullptr;
107 uptr module_offset;
108 ModuleArch arch;
109 if (!FindModuleNameAndOffsetForAddress(address: addr, module_name: &module_name, module_offset: &module_offset,
110 module_arch: &arch))
111 return false;
112 info->Clear();
113 info->module = internal_strdup(s: module_name);
114 info->module_offset = module_offset;
115 info->module_arch = arch;
116 for (auto &tool : tools_) {
117 SymbolizerScope sym_scope(this);
118 if (tool.SymbolizeData(addr, info)) {
119 return true;
120 }
121 }
122 return false;
123}
124
125bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
126 Lock l(&mu_);
127 const char *module_name = nullptr;
128 if (!FindModuleNameAndOffsetForAddress(
129 address: addr, module_name: &module_name, module_offset: &info->module_offset, module_arch: &info->module_arch))
130 return false;
131 info->module = internal_strdup(s: module_name);
132 for (auto &tool : tools_) {
133 SymbolizerScope sym_scope(this);
134 if (tool.SymbolizeFrame(addr, info)) {
135 return true;
136 }
137 }
138 return false;
139}
140
141bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
142 uptr *module_address) {
143 Lock l(&mu_);
144 const char *internal_module_name = nullptr;
145 ModuleArch arch;
146 if (!FindModuleNameAndOffsetForAddress(address: pc, module_name: &internal_module_name,
147 module_offset: module_address, module_arch: &arch))
148 return false;
149
150 if (module_name)
151 *module_name = module_names_.GetOwnedCopy(str: internal_module_name);
152 return true;
153}
154
155void Symbolizer::Flush() {
156 Lock l(&mu_);
157 for (auto &tool : tools_) {
158 SymbolizerScope sym_scope(this);
159 tool.Flush();
160 }
161}
162
163const char *Symbolizer::Demangle(const char *name) {
164 CHECK(name);
165 Lock l(&mu_);
166 for (auto &tool : tools_) {
167 SymbolizerScope sym_scope(this);
168 if (const char *demangled = tool.Demangle(name))
169 return demangled;
170 }
171 if (const char *demangled = PlatformDemangle(name))
172 return demangled;
173 return name;
174}
175
176bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
177 const char **module_name,
178 uptr *module_offset,
179 ModuleArch *module_arch) {
180 const LoadedModule *module = FindModuleForAddress(address);
181 if (!module)
182 return false;
183 *module_name = module->full_name();
184 *module_offset = address - module->base_address();
185 *module_arch = module->arch();
186 return true;
187}
188
189void Symbolizer::RefreshModules() {
190 modules_.init();
191 fallback_modules_.fallbackInit();
192 RAW_CHECK(modules_.size() > 0);
193 modules_fresh_ = true;
194}
195
196const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
197 if (!modules_fresh_)
198 RefreshModules();
199
200 return modules_;
201}
202
203static const LoadedModule *SearchForModule(const ListOfModules &modules,
204 uptr address) {
205 for (uptr i = 0; i < modules.size(); i++) {
206 if (modules[i].containsAddress(address)) {
207 return &modules[i];
208 }
209 }
210 return nullptr;
211}
212
213const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
214 bool modules_were_reloaded = false;
215 if (!modules_fresh_) {
216 RefreshModules();
217 modules_were_reloaded = true;
218 }
219 const LoadedModule *module = SearchForModule(modules: modules_, address);
220 if (module)
221 return module;
222
223 // dlopen/dlclose interceptors invalidate the module list, but when
224 // interception is disabled, we need to retry if the lookup fails in
225 // case the module list changed.
226# if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
227 if (!modules_were_reloaded) {
228 RefreshModules();
229 module = SearchForModule(modules: modules_, address);
230 if (module)
231 return module;
232 }
233# endif
234
235 if (fallback_modules_.size()) {
236 module = SearchForModule(modules: fallback_modules_, address);
237 }
238 return module;
239}
240
241// For now we assume the following protocol:
242// For each request of the form
243// <module_name> <module_offset>
244// passed to STDIN, external symbolizer prints to STDOUT response:
245// <function_name>
246// <file_name>:<line_number>:<column_number>
247// <function_name>
248// <file_name>:<line_number>:<column_number>
249// ...
250// <empty line>
251class LLVMSymbolizerProcess final : public SymbolizerProcess {
252 public:
253 explicit LLVMSymbolizerProcess(const char *path)
254 : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
255
256 private:
257 bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
258 // Empty line marks the end of llvm-symbolizer output.
259 return length >= 2 && buffer[length - 1] == '\n' &&
260 buffer[length - 2] == '\n';
261 }
262
263 // When adding a new architecture, don't forget to also update
264 // script/asan_symbolize.py and sanitizer_common.h.
265 void GetArgV(const char *path_to_binary,
266 const char *(&argv)[kArgVMax]) const override {
267# if defined(__x86_64h__)
268 const char *const kSymbolizerArch = "--default-arch=x86_64h";
269# elif defined(__x86_64__)
270 const char *const kSymbolizerArch = "--default-arch=x86_64";
271# elif defined(__i386__)
272 const char *const kSymbolizerArch = "--default-arch=i386";
273# elif SANITIZER_LOONGARCH64
274 const char *const kSymbolizerArch = "--default-arch=loongarch64";
275# elif SANITIZER_RISCV64
276 const char *const kSymbolizerArch = "--default-arch=riscv64";
277# elif defined(__aarch64__)
278 const char *const kSymbolizerArch = "--default-arch=arm64";
279# elif defined(__arm__)
280 const char *const kSymbolizerArch = "--default-arch=arm";
281# elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
282 const char *const kSymbolizerArch = "--default-arch=powerpc64";
283# elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
284 const char *const kSymbolizerArch = "--default-arch=powerpc64le";
285# elif defined(__s390x__)
286 const char *const kSymbolizerArch = "--default-arch=s390x";
287# elif defined(__s390__)
288 const char *const kSymbolizerArch = "--default-arch=s390";
289# else
290 const char *const kSymbolizerArch = "--default-arch=unknown";
291# endif
292
293 const char *const demangle_flag =
294 common_flags()->demangle ? "--demangle" : "--no-demangle";
295 const char *const inline_flag =
296 common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
297 int i = 0;
298 argv[i++] = path_to_binary;
299 argv[i++] = demangle_flag;
300 argv[i++] = inline_flag;
301 argv[i++] = kSymbolizerArch;
302 argv[i++] = nullptr;
303 CHECK_LE(i, kArgVMax);
304 }
305};
306
307LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
308 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
309
310// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
311// Windows, so extract tokens from the right hand side first. The column info is
312// also optional.
313static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
314 char *file_line_info = nullptr;
315 str = ExtractToken(str, delims: "\n", result: &file_line_info);
316 CHECK(file_line_info);
317
318 if (uptr size = internal_strlen(s: file_line_info)) {
319 char *back = file_line_info + size - 1;
320 for (int i = 0; i < 2; ++i) {
321 while (back > file_line_info && IsDigit(c: *back)) --back;
322 if (*back != ':' || !IsDigit(c: back[1]))
323 break;
324 info->column = info->line;
325 info->line = internal_atoll(nptr: back + 1);
326 // Truncate the string at the colon to keep only filename.
327 *back = '\0';
328 --back;
329 }
330 ExtractToken(str: file_line_info, delims: "", result: &info->file);
331 }
332
333 InternalFree(p: file_line_info);
334 return str;
335}
336
337// Parses one or more two-line strings in the following format:
338// <function_name>
339// <file_name>:<line_number>[:<column_number>]
340// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
341// them use the same output format.
342void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
343 bool top_frame = true;
344 SymbolizedStack *last = res;
345 while (true) {
346 char *function_name = nullptr;
347 str = ExtractToken(str, delims: "\n", result: &function_name);
348 CHECK(function_name);
349 if (function_name[0] == '\0') {
350 // There are no more frames.
351 InternalFree(p: function_name);
352 break;
353 }
354 SymbolizedStack *cur;
355 if (top_frame) {
356 cur = res;
357 top_frame = false;
358 } else {
359 cur = SymbolizedStack::New(addr: res->info.address);
360 cur->info.FillModuleInfo(mod_name: res->info.module, mod_offset: res->info.module_offset,
361 arch: res->info.module_arch);
362 last->next = cur;
363 last = cur;
364 }
365
366 AddressInfo *info = &cur->info;
367 info->function = function_name;
368 str = ParseFileLineInfo(info, str);
369
370 // Functions and filenames can be "??", in which case we write 0
371 // to address info to mark that names are unknown.
372 if (0 == internal_strcmp(s1: info->function, s2: "??")) {
373 InternalFree(p: info->function);
374 info->function = 0;
375 }
376 if (info->file && 0 == internal_strcmp(s1: info->file, s2: "??")) {
377 InternalFree(p: info->file);
378 info->file = 0;
379 }
380 }
381}
382
383// Parses a two- or three-line string in the following format:
384// <symbol_name>
385// <start_address> <size>
386// <filename>:<column>
387// Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
388// for symbolizing the third line in D123538, but we support the older two-line
389// information as well.
390void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
391 str = ExtractToken(str, delims: "\n", result: &info->name);
392 str = ExtractUptr(str, delims: " ", result: &info->start);
393 str = ExtractUptr(str, delims: "\n", result: &info->size);
394 // Note: If the third line isn't present, these calls will set info.{file,
395 // line} to empty strings.
396 str = ExtractToken(str, delims: ":", result: &info->file);
397 str = ExtractUptr(str, delims: "\n", result: &info->line);
398}
399
400void ParseSymbolizeFrameOutput(const char *str,
401 InternalMmapVector<LocalInfo> *locals) {
402 if (internal_strncmp(s1: str, s2: "??", n: 2) == 0)
403 return;
404
405 while (*str) {
406 LocalInfo local;
407 str = ExtractToken(str, delims: "\n", result: &local.function_name);
408 str = ExtractToken(str, delims: "\n", result: &local.name);
409
410 AddressInfo addr;
411 str = ParseFileLineInfo(info: &addr, str);
412 local.decl_file = addr.file;
413 local.decl_line = addr.line;
414
415 local.has_frame_offset = internal_strncmp(s1: str, s2: "??", n: 2) != 0;
416 str = ExtractSptr(str, delims: " ", result: &local.frame_offset);
417
418 local.has_size = internal_strncmp(s1: str, s2: "??", n: 2) != 0;
419 str = ExtractUptr(str, delims: " ", result: &local.size);
420
421 local.has_tag_offset = internal_strncmp(s1: str, s2: "??", n: 2) != 0;
422 str = ExtractUptr(str, delims: "\n", result: &local.tag_offset);
423
424 locals->push_back(element: local);
425 }
426}
427
428bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
429 AddressInfo *info = &stack->info;
430 const char *buf = FormatAndSendCommand(
431 command_prefix: "CODE", module_name: info->module, module_offset: info->module_offset, arch: info->module_arch);
432 if (!buf)
433 return false;
434 ParseSymbolizePCOutput(str: buf, res: stack);
435 return true;
436}
437
438bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
439 const char *buf = FormatAndSendCommand(
440 command_prefix: "DATA", module_name: info->module, module_offset: info->module_offset, arch: info->module_arch);
441 if (!buf)
442 return false;
443 ParseSymbolizeDataOutput(str: buf, info);
444 info->start += (addr - info->module_offset); // Add the base address.
445 return true;
446}
447
448bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
449 const char *buf = FormatAndSendCommand(
450 command_prefix: "FRAME", module_name: info->module, module_offset: info->module_offset, arch: info->module_arch);
451 if (!buf)
452 return false;
453 ParseSymbolizeFrameOutput(str: buf, locals: &info->locals);
454 return true;
455}
456
457const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
458 const char *module_name,
459 uptr module_offset,
460 ModuleArch arch) {
461 CHECK(module_name);
462 int size_needed = 0;
463 if (arch == kModuleArchUnknown)
464 size_needed = internal_snprintf(buffer: buffer_, length: kBufferSize, format: "%s \"%s\" 0x%zx\n",
465 command_prefix, module_name, module_offset);
466 else
467 size_needed = internal_snprintf(
468 buffer: buffer_, length: kBufferSize, format: "%s \"%s:%s\" 0x%zx\n", command_prefix,
469 module_name, ModuleArchToString(arch), module_offset);
470
471 if (size_needed >= static_cast<int>(kBufferSize)) {
472 Report(format: "WARNING: Command buffer too small");
473 return nullptr;
474 }
475
476 return symbolizer_process_->SendCommand(command: buffer_);
477}
478
479SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
480 : path_(path),
481 input_fd_(kInvalidFd),
482 output_fd_(kInvalidFd),
483 times_restarted_(0),
484 failed_to_start_(false),
485 reported_invalid_path_(false),
486 use_posix_spawn_(use_posix_spawn) {
487 CHECK(path_);
488 CHECK_NE(path_[0], '\0');
489}
490
491static bool IsSameModule(const char *path) {
492 if (const char *ProcessName = GetProcessName()) {
493 if (const char *SymbolizerName = StripModuleName(module: path)) {
494 return !internal_strcmp(s1: ProcessName, s2: SymbolizerName);
495 }
496 }
497 return false;
498}
499
500const char *SymbolizerProcess::SendCommand(const char *command) {
501 if (failed_to_start_)
502 return nullptr;
503 if (IsSameModule(path: path_)) {
504 Report(format: "WARNING: Symbolizer was blocked from starting itself!\n");
505 failed_to_start_ = true;
506 return nullptr;
507 }
508 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
509 // Start or restart symbolizer if we failed to send command to it.
510 if (const char *res = SendCommandImpl(command))
511 return res;
512 Restart();
513 }
514 if (!failed_to_start_) {
515 Report(format: "WARNING: Failed to use and restart external symbolizer!\n");
516 failed_to_start_ = true;
517 }
518 return nullptr;
519}
520
521const char *SymbolizerProcess::SendCommandImpl(const char *command) {
522 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
523 return nullptr;
524 if (!WriteToSymbolizer(buffer: command, length: internal_strlen(s: command)))
525 return nullptr;
526 if (!ReadFromSymbolizer())
527 return nullptr;
528 return buffer_.data();
529}
530
531bool SymbolizerProcess::Restart() {
532 if (input_fd_ != kInvalidFd)
533 CloseFile(input_fd_);
534 if (output_fd_ != kInvalidFd)
535 CloseFile(output_fd_);
536 return StartSymbolizerSubprocess();
537}
538
539bool SymbolizerProcess::ReadFromSymbolizer() {
540 buffer_.clear();
541 constexpr uptr max_length = 1024;
542 bool ret = true;
543 do {
544 uptr just_read = 0;
545 uptr size_before = buffer_.size();
546 buffer_.resize(new_size: size_before + max_length);
547 buffer_.resize(new_size: buffer_.capacity());
548 bool ret = ReadFromFile(fd: input_fd_, buff: &buffer_[size_before],
549 buff_size: buffer_.size() - size_before, bytes_read: &just_read);
550
551 if (!ret)
552 just_read = 0;
553
554 buffer_.resize(new_size: size_before + just_read);
555
556 // We can't read 0 bytes, as we don't expect external symbolizer to close
557 // its stdout.
558 if (just_read == 0) {
559 Report(format: "WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
560 ret = false;
561 break;
562 }
563 } while (!ReachedEndOfOutput(buffer: buffer_.data(), length: buffer_.size()));
564 buffer_.push_back(element: '\0');
565 return ret;
566}
567
568bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
569 if (length == 0)
570 return true;
571 uptr write_len = 0;
572 bool success = WriteToFile(fd: output_fd_, buff: buffer, buff_size: length, bytes_written: &write_len);
573 if (!success || write_len != length) {
574 Report(format: "WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
575 return false;
576 }
577 return true;
578}
579
580#endif // !SANITIZER_SYMBOLIZER_MARKUP
581
582} // namespace __sanitizer
583

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp