1 | //===-- DILLexer.cpp ------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | // This implements the recursive descent parser for the Data Inspection |
8 | // Language (DIL), and its helper functions, which will eventually underlie the |
9 | // 'frame variable' command. The language that this parser recognizes is |
10 | // described in lldb/docs/dil-expr-lang.ebnf |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "lldb/ValueObject/DILLexer.h" |
15 | #include "lldb/Utility/Status.h" |
16 | #include "lldb/ValueObject/DILParser.h" |
17 | #include "llvm/ADT/StringSwitch.h" |
18 | |
19 | namespace lldb_private::dil { |
20 | |
21 | llvm::StringRef Token::GetTokenName(Kind kind) { |
22 | switch (kind) { |
23 | case Kind::amp: |
24 | return "amp" ; |
25 | case Kind::arrow: |
26 | return "arrow" ; |
27 | case Kind::coloncolon: |
28 | return "coloncolon" ; |
29 | case Kind::eof: |
30 | return "eof" ; |
31 | case Kind::identifier: |
32 | return "identifier" ; |
33 | case Kind::l_paren: |
34 | return "l_paren" ; |
35 | case Kind::l_square: |
36 | return "l_square" ; |
37 | case Kind::numeric_constant: |
38 | return "numeric_constant" ; |
39 | case Kind::period: |
40 | return "period" ; |
41 | case Kind::r_paren: |
42 | return "r_paren" ; |
43 | case Kind::r_square: |
44 | return "r_square" ; |
45 | case Token::star: |
46 | return "star" ; |
47 | } |
48 | llvm_unreachable("Unknown token name" ); |
49 | } |
50 | |
51 | static bool IsLetter(char c) { |
52 | return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); |
53 | } |
54 | |
55 | static bool IsDigit(char c) { return '0' <= c && c <= '9'; } |
56 | |
57 | // A word starts with a letter, underscore, or dollar sign, followed by |
58 | // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. |
59 | static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr, |
60 | llvm::StringRef &remainder) { |
61 | // Find the longest prefix consisting of letters, digits, underscors and |
62 | // '$'. If it doesn't start with a digit, then it's a word. |
63 | llvm::StringRef candidate = remainder.take_while( |
64 | F: [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; }); |
65 | if (candidate.empty() || IsDigit(c: candidate[0])) |
66 | return std::nullopt; |
67 | remainder = remainder.drop_front(N: candidate.size()); |
68 | return candidate; |
69 | } |
70 | |
71 | static bool IsNumberBodyChar(char ch) { return IsDigit(c: ch) || IsLetter(c: ch); } |
72 | |
73 | static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, |
74 | llvm::StringRef &remainder) { |
75 | if (IsDigit(c: remainder[0])) { |
76 | llvm::StringRef number = remainder.take_while(F: IsNumberBodyChar); |
77 | remainder = remainder.drop_front(N: number.size()); |
78 | return number; |
79 | } |
80 | return std::nullopt; |
81 | } |
82 | |
83 | llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) { |
84 | std::vector<Token> tokens; |
85 | llvm::StringRef remainder = expr; |
86 | do { |
87 | if (llvm::Expected<Token> t = Lex(expr, remainder)) { |
88 | tokens.push_back(x: std::move(*t)); |
89 | } else { |
90 | return t.takeError(); |
91 | } |
92 | } while (tokens.back().GetKind() != Token::eof); |
93 | return DILLexer(expr, std::move(tokens)); |
94 | } |
95 | |
96 | llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, |
97 | llvm::StringRef &remainder) { |
98 | // Skip over whitespace (spaces). |
99 | remainder = remainder.ltrim(); |
100 | llvm::StringRef::iterator cur_pos = remainder.begin(); |
101 | |
102 | // Check to see if we've reached the end of our input string. |
103 | if (remainder.empty()) |
104 | return Token(Token::eof, "" , (uint32_t)expr.size()); |
105 | |
106 | uint32_t position = cur_pos - expr.begin(); |
107 | std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder); |
108 | if (maybe_number) |
109 | return Token(Token::numeric_constant, maybe_number->str(), position); |
110 | std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder); |
111 | if (maybe_word) |
112 | return Token(Token::identifier, maybe_word->str(), position); |
113 | |
114 | constexpr std::pair<Token::Kind, const char *> operators[] = { |
115 | {Token::amp, "&" }, {Token::arrow, "->" }, {Token::coloncolon, "::" }, |
116 | {Token::l_paren, "(" }, {Token::l_square, "[" }, {Token::period, "." }, |
117 | {Token::r_paren, ")" }, {Token::r_square, "]" }, {Token::star, "*" }, |
118 | }; |
119 | for (auto [kind, str] : operators) { |
120 | if (remainder.consume_front(Prefix: str)) |
121 | return Token(kind, str, position); |
122 | } |
123 | |
124 | // Unrecognized character(s) in string; unable to lex it. |
125 | return llvm::make_error<DILDiagnosticError>(Args&: expr, Args: "unrecognized token" , |
126 | Args&: position); |
127 | } |
128 | |
129 | } // namespace lldb_private::dil |
130 | |