1//===-- BreakpadRecords.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10#include "lldb/lldb-defines.h"
11#include "llvm/ADT/StringExtras.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/Support/Endian.h"
14#include "llvm/Support/FormatVariadic.h"
15#include <optional>
16
17using namespace lldb_private;
18using namespace lldb_private::breakpad;
19
20namespace {
21enum class Token {
22 Unknown,
23 Module,
24 Info,
25 CodeID,
26 File,
27 Func,
28 Inline,
29 InlineOrigin,
30 Public,
31 Stack,
32 CFI,
33 Init,
34 Win,
35};
36}
37
38template<typename T>
39static T stringTo(llvm::StringRef Str);
40
41template <> Token stringTo<Token>(llvm::StringRef Str) {
42 return llvm::StringSwitch<Token>(Str)
43 .Case(S: "MODULE", Value: Token::Module)
44 .Case(S: "INFO", Value: Token::Info)
45 .Case(S: "CODE_ID", Value: Token::CodeID)
46 .Case(S: "FILE", Value: Token::File)
47 .Case(S: "FUNC", Value: Token::Func)
48 .Case(S: "INLINE", Value: Token::Inline)
49 .Case(S: "INLINE_ORIGIN", Value: Token::InlineOrigin)
50 .Case(S: "PUBLIC", Value: Token::Public)
51 .Case(S: "STACK", Value: Token::Stack)
52 .Case(S: "CFI", Value: Token::CFI)
53 .Case(S: "INIT", Value: Token::Init)
54 .Case(S: "WIN", Value: Token::Win)
55 .Default(Value: Token::Unknown);
56}
57
58template <>
59llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
60 using llvm::Triple;
61 return llvm::StringSwitch<Triple::OSType>(Str)
62 .Case(S: "Linux", Value: Triple::Linux)
63 .Case(S: "mac", Value: Triple::MacOSX)
64 .Case(S: "windows", Value: Triple::Win32)
65 .Default(Value: Triple::UnknownOS);
66}
67
68template <>
69llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
70 using llvm::Triple;
71 return llvm::StringSwitch<Triple::ArchType>(Str)
72 .Case(S: "arm", Value: Triple::arm)
73 .Cases(S0: "arm64", S1: "arm64e", Value: Triple::aarch64)
74 .Case(S: "mips", Value: Triple::mips)
75 .Case(S: "msp430", Value: Triple::msp430)
76 .Case(S: "ppc", Value: Triple::ppc)
77 .Case(S: "ppc64", Value: Triple::ppc64)
78 .Case(S: "s390", Value: Triple::systemz)
79 .Case(S: "sparc", Value: Triple::sparc)
80 .Case(S: "sparcv9", Value: Triple::sparcv9)
81 .Case(S: "x86", Value: Triple::x86)
82 .Cases(S0: "x86_64", S1: "x86_64h", Value: Triple::x86_64)
83 .Default(Value: Triple::UnknownArch);
84}
85
86template<typename T>
87static T consume(llvm::StringRef &Str) {
88 llvm::StringRef Token;
89 std::tie(args&: Token, args&: Str) = getToken(Source: Str);
90 return stringTo<T>(Token);
91}
92
93/// Return the number of hex digits needed to encode an (POD) object of a given
94/// type.
95template <typename T> static constexpr size_t hex_digits() {
96 return 2 * sizeof(T);
97}
98
99static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
100 struct data_t {
101 using uuid_t = uint8_t[16];
102 uuid_t uuid;
103 llvm::support::ubig32_t age;
104 } data;
105 static_assert(sizeof(data) == 20);
106 // The textual module id encoding should be between 33 and 40 bytes long,
107 // depending on the size of the age field, which is of variable length.
108 // The first three chunks of the id are encoded in big endian, so we need to
109 // byte-swap those.
110 if (str.size() <= hex_digits<data_t::uuid_t>() ||
111 str.size() > hex_digits<data_t>())
112 return UUID();
113 if (!all_of(Range&: str, P: llvm::isHexDigit))
114 return UUID();
115
116 llvm::StringRef uuid_str = str.take_front(N: hex_digits<data_t::uuid_t>());
117 llvm::StringRef age_str = str.drop_front(N: hex_digits<data_t::uuid_t>());
118
119 llvm::copy(Range: fromHex(Input: uuid_str), Out: data.uuid);
120 uint32_t age;
121 bool success = to_integer(S: age_str, Num&: age, Base: 16);
122 assert(success);
123 UNUSED_IF_ASSERT_DISABLED(success);
124 data.age = age;
125
126 // On non-windows, the age field should always be zero, so we don't include to
127 // match the native uuid format of these platforms.
128 return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data)
129 : sizeof(data.uuid));
130}
131
132std::optional<Record::Kind> Record::classify(llvm::StringRef Line) {
133 Token Tok = consume<Token>(Str&: Line);
134 switch (Tok) {
135 case Token::Module:
136 return Record::Module;
137 case Token::Info:
138 return Record::Info;
139 case Token::File:
140 return Record::File;
141 case Token::Func:
142 return Record::Func;
143 case Token::Public:
144 return Record::Public;
145 case Token::Stack:
146 Tok = consume<Token>(Str&: Line);
147 switch (Tok) {
148 case Token::CFI:
149 return Record::StackCFI;
150 case Token::Win:
151 return Record::StackWin;
152 default:
153 return std::nullopt;
154 }
155 case Token::Inline:
156 return Record::Inline;
157 case Token::InlineOrigin:
158 return Record::InlineOrigin;
159 case Token::Unknown:
160 // Optimistically assume that any unrecognised token means this is a line
161 // record, those don't have a special keyword and start directly with a
162 // hex number.
163 return Record::Line;
164
165 case Token::CodeID:
166 case Token::CFI:
167 case Token::Init:
168 case Token::Win:
169 // These should never appear at the start of a valid record.
170 return std::nullopt;
171 }
172 llvm_unreachable("Fully covered switch above!");
173}
174
175std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
176 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
177 if (consume<Token>(Str&: Line) != Token::Module)
178 return std::nullopt;
179
180 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Str&: Line);
181 if (OS == llvm::Triple::UnknownOS)
182 return std::nullopt;
183
184 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Str&: Line);
185 if (Arch == llvm::Triple::UnknownArch)
186 return std::nullopt;
187
188 llvm::StringRef Str;
189 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
190 UUID ID = parseModuleId(os: OS, str: Str);
191 if (!ID)
192 return std::nullopt;
193
194 return ModuleRecord(OS, Arch, std::move(ID));
195}
196
197llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
198 const ModuleRecord &R) {
199 return OS << "MODULE " << llvm::Triple::getOSTypeName(Kind: R.OS) << " "
200 << llvm::Triple::getArchTypeName(Kind: R.Arch) << " "
201 << R.ID.GetAsString();
202}
203
204std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
205 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
206 if (consume<Token>(Str&: Line) != Token::Info)
207 return std::nullopt;
208
209 if (consume<Token>(Str&: Line) != Token::CodeID)
210 return std::nullopt;
211
212 llvm::StringRef Str;
213 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
214 // If we don't have any text following the code ID (e.g. on linux), we should
215 // use this as the UUID. Otherwise, we should revert back to the module ID.
216 UUID ID;
217 if (Line.trim().empty()) {
218 if (Str.empty() || !ID.SetFromStringRef(Str))
219 return std::nullopt;
220 }
221 return InfoRecord(std::move(ID));
222}
223
224llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
225 const InfoRecord &R) {
226 return OS << "INFO CODE_ID " << R.ID.GetAsString();
227}
228
229template <typename T>
230static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) {
231 // TOKEN number name
232 if (consume<Token>(Str&: Line) != TokenType)
233 return std::nullopt;
234
235 llvm::StringRef Str;
236 size_t Number;
237 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
238 if (!to_integer(S: Str, Num&: Number))
239 return std::nullopt;
240
241 llvm::StringRef Name = Line.trim();
242 if (Name.empty())
243 return std::nullopt;
244
245 return T(Number, Name);
246}
247
248std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
249 // FILE number name
250 return parseNumberName<FileRecord>(Line, TokenType: Token::File);
251}
252
253llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
254 const FileRecord &R) {
255 return OS << "FILE " << R.Number << " " << R.Name;
256}
257
258std::optional<InlineOriginRecord>
259InlineOriginRecord::parse(llvm::StringRef Line) {
260 // INLINE_ORIGIN number name
261 return parseNumberName<InlineOriginRecord>(Line, TokenType: Token::InlineOrigin);
262}
263
264llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
265 const InlineOriginRecord &R) {
266 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
267}
268
269static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
270 lldb::addr_t &Address, lldb::addr_t *Size,
271 lldb::addr_t &ParamSize, llvm::StringRef &Name) {
272 // PUBLIC [m] address param_size name
273 // or
274 // FUNC [m] address size param_size name
275
276 Token Tok = Size ? Token::Func : Token::Public;
277
278 if (consume<Token>(Str&: Line) != Tok)
279 return false;
280
281 llvm::StringRef Str;
282 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
283 Multiple = Str == "m";
284
285 if (Multiple)
286 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
287 if (!to_integer(S: Str, Num&: Address, Base: 16))
288 return false;
289
290 if (Tok == Token::Func) {
291 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
292 if (!to_integer(S: Str, Num&: *Size, Base: 16))
293 return false;
294 }
295
296 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
297 if (!to_integer(S: Str, Num&: ParamSize, Base: 16))
298 return false;
299
300 Name = Line.trim();
301 if (Name.empty())
302 return false;
303
304 return true;
305}
306
307std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
308 bool Multiple;
309 lldb::addr_t Address, Size, ParamSize;
310 llvm::StringRef Name;
311
312 if (parsePublicOrFunc(Line, Multiple, Address, Size: &Size, ParamSize, Name))
313 return FuncRecord(Multiple, Address, Size, ParamSize, Name);
314
315 return std::nullopt;
316}
317
318bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
319 return L.Multiple == R.Multiple && L.Address == R.Address &&
320 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
321}
322llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
323 const FuncRecord &R) {
324 return OS << llvm::formatv(Fmt: "FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
325 Vals: R.Multiple ? "m " : "", Vals: R.Address, Vals: R.Size,
326 Vals: R.ParamSize, Vals: R.Name);
327}
328
329std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
330 // INLINE inline_nest_level call_site_line call_site_file_num origin_num
331 // [address size]+
332 if (consume<Token>(Str&: Line) != Token::Inline)
333 return std::nullopt;
334
335 llvm::SmallVector<llvm::StringRef> Tokens;
336 SplitString(Source: Line, OutFragments&: Tokens, Delimiters: " ");
337 if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
338 return std::nullopt;
339
340 size_t InlineNestLevel;
341 uint32_t CallSiteLineNum;
342 size_t CallSiteFileNum;
343 size_t OriginNum;
344 if (!(to_integer(S: Tokens[0], Num&: InlineNestLevel) &&
345 to_integer(S: Tokens[1], Num&: CallSiteLineNum) &&
346 to_integer(S: Tokens[2], Num&: CallSiteFileNum) &&
347 to_integer(S: Tokens[3], Num&: OriginNum)))
348 return std::nullopt;
349
350 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
351 CallSiteFileNum, OriginNum);
352 for (size_t i = 4; i < Tokens.size(); i += 2) {
353 lldb::addr_t Address;
354 if (!to_integer(S: Tokens[i], Num&: Address, Base: 16))
355 return std::nullopt;
356 lldb::addr_t Size;
357 if (!to_integer(S: Tokens[i + 1].trim(), Num&: Size, Base: 16))
358 return std::nullopt;
359 Record.Ranges.emplace_back(args&: Address, args&: Size);
360 }
361 return Record;
362}
363
364bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
365 return L.InlineNestLevel == R.InlineNestLevel &&
366 L.CallSiteLineNum == R.CallSiteLineNum &&
367 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
368 L.Ranges == R.Ranges;
369}
370
371llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
372 const InlineRecord &R) {
373 OS << llvm::formatv(Fmt: "INLINE {0} {1} {2} {3}", Vals: R.InlineNestLevel,
374 Vals: R.CallSiteLineNum, Vals: R.CallSiteFileNum, Vals: R.OriginNum);
375 for (const auto &range : R.Ranges) {
376 OS << llvm::formatv(Fmt: " {0:x-} {1:x-}", Vals: range.first, Vals: range.second);
377 }
378 return OS;
379}
380
381std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
382 lldb::addr_t Address;
383 llvm::StringRef Str;
384 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
385 if (!to_integer(S: Str, Num&: Address, Base: 16))
386 return std::nullopt;
387
388 lldb::addr_t Size;
389 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
390 if (!to_integer(S: Str, Num&: Size, Base: 16))
391 return std::nullopt;
392
393 uint32_t LineNum;
394 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
395 if (!to_integer(S: Str, Num&: LineNum))
396 return std::nullopt;
397
398 size_t FileNum;
399 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
400 if (!to_integer(S: Str, Num&: FileNum))
401 return std::nullopt;
402
403 return LineRecord(Address, Size, LineNum, FileNum);
404}
405
406bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
407 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
408 L.FileNum == R.FileNum;
409}
410llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
411 const LineRecord &R) {
412 return OS << llvm::formatv(Fmt: "{0:x-} {1:x-} {2} {3}", Vals: R.Address, Vals: R.Size,
413 Vals: R.LineNum, Vals: R.FileNum);
414}
415
416std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
417 bool Multiple;
418 lldb::addr_t Address, ParamSize;
419 llvm::StringRef Name;
420
421 if (parsePublicOrFunc(Line, Multiple, Address, Size: nullptr, ParamSize, Name))
422 return PublicRecord(Multiple, Address, ParamSize, Name);
423
424 return std::nullopt;
425}
426
427bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
428 return L.Multiple == R.Multiple && L.Address == R.Address &&
429 L.ParamSize == R.ParamSize && L.Name == R.Name;
430}
431llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
432 const PublicRecord &R) {
433 return OS << llvm::formatv(Fmt: "PUBLIC {0}{1:x-} {2:x-} {3}",
434 Vals: R.Multiple ? "m " : "", Vals: R.Address, Vals: R.ParamSize,
435 Vals: R.Name);
436}
437
438std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
439 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
440 // or
441 // STACK CFI address reg1: expr1 reg2: expr2 ...
442 // No token in exprN ends with a colon.
443
444 if (consume<Token>(Str&: Line) != Token::Stack)
445 return std::nullopt;
446 if (consume<Token>(Str&: Line) != Token::CFI)
447 return std::nullopt;
448
449 llvm::StringRef Str;
450 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
451
452 bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
453 if (IsInitRecord)
454 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
455
456 lldb::addr_t Address;
457 if (!to_integer(S: Str, Num&: Address, Base: 16))
458 return std::nullopt;
459
460 std::optional<lldb::addr_t> Size;
461 if (IsInitRecord) {
462 Size.emplace();
463 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
464 if (!to_integer(S: Str, Num&: *Size, Base: 16))
465 return std::nullopt;
466 }
467
468 return StackCFIRecord(Address, Size, Line.trim());
469}
470
471bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
472 return L.Address == R.Address && L.Size == R.Size &&
473 L.UnwindRules == R.UnwindRules;
474}
475
476llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
477 const StackCFIRecord &R) {
478 OS << "STACK CFI ";
479 if (R.Size)
480 OS << "INIT ";
481 OS << llvm::formatv(Fmt: "{0:x-} ", Vals: R.Address);
482 if (R.Size)
483 OS << llvm::formatv(Fmt: "{0:x-} ", Vals: *R.Size);
484 return OS << " " << R.UnwindRules;
485}
486
487std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
488 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
489 // saved_register_size local_size max_stack_size has_program_string
490 // program_string_OR_allocates_base_pointer
491
492 if (consume<Token>(Str&: Line) != Token::Stack)
493 return std::nullopt;
494 if (consume<Token>(Str&: Line) != Token::Win)
495 return std::nullopt;
496
497 llvm::StringRef Str;
498 uint8_t Type;
499 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
500 // Right now we only support the "FrameData" frame type.
501 if (!to_integer(S: Str, Num&: Type) || FrameType(Type) != FrameType::FrameData)
502 return std::nullopt;
503
504 lldb::addr_t RVA;
505 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
506 if (!to_integer(S: Str, Num&: RVA, Base: 16))
507 return std::nullopt;
508
509 lldb::addr_t CodeSize;
510 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
511 if (!to_integer(S: Str, Num&: CodeSize, Base: 16))
512 return std::nullopt;
513
514 // Skip fields which we aren't using right now.
515 std::tie(args&: Str, args&: Line) = getToken(Source: Line); // prologue_size
516 std::tie(args&: Str, args&: Line) = getToken(Source: Line); // epilogue_size
517
518 lldb::addr_t ParameterSize;
519 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
520 if (!to_integer(S: Str, Num&: ParameterSize, Base: 16))
521 return std::nullopt;
522
523 lldb::addr_t SavedRegisterSize;
524 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
525 if (!to_integer(S: Str, Num&: SavedRegisterSize, Base: 16))
526 return std::nullopt;
527
528 lldb::addr_t LocalSize;
529 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
530 if (!to_integer(S: Str, Num&: LocalSize, Base: 16))
531 return std::nullopt;
532
533 std::tie(args&: Str, args&: Line) = getToken(Source: Line); // max_stack_size
534
535 uint8_t HasProgramString;
536 std::tie(args&: Str, args&: Line) = getToken(Source: Line);
537 if (!to_integer(S: Str, Num&: HasProgramString))
538 return std::nullopt;
539 // FrameData records should always have a program string.
540 if (!HasProgramString)
541 return std::nullopt;
542
543 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
544 LocalSize, Line.trim());
545}
546
547bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
548 return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
549 L.ParameterSize == R.ParameterSize &&
550 L.SavedRegisterSize == R.SavedRegisterSize &&
551 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
552}
553
554llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
555 const StackWinRecord &R) {
556 return OS << llvm::formatv(
557 Fmt: "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", Vals: R.RVA,
558 Vals: R.CodeSize, Vals: R.ParameterSize, Vals: R.SavedRegisterSize, Vals: R.LocalSize,
559 Vals: R.ProgramString);
560}
561
562llvm::StringRef breakpad::toString(Record::Kind K) {
563 switch (K) {
564 case Record::Module:
565 return "MODULE";
566 case Record::Info:
567 return "INFO";
568 case Record::File:
569 return "FILE";
570 case Record::Func:
571 return "FUNC";
572 case Record::Inline:
573 return "INLINE";
574 case Record::InlineOrigin:
575 return "INLINE_ORIGIN";
576 case Record::Line:
577 return "LINE";
578 case Record::Public:
579 return "PUBLIC";
580 case Record::StackCFI:
581 return "STACK CFI";
582 case Record::StackWin:
583 return "STACK WIN";
584 }
585 llvm_unreachable("Unknown record kind!");
586}
587

source code of lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp