1 | //===-- DILParser.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | // This implements the recursive descent parser for the Data Inspection |
8 | // Language (DIL), and its helper functions, which will eventually underlie the |
9 | // 'frame variable' command. The language that this parser recognizes is |
10 | // described in lldb/docs/dil-expr-lang.ebnf |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "lldb/ValueObject/DILParser.h" |
15 | #include "lldb/Target/ExecutionContextScope.h" |
16 | #include "lldb/Utility/DiagnosticsRendering.h" |
17 | #include "lldb/ValueObject/DILAST.h" |
18 | #include "lldb/ValueObject/DILEval.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/Support/FormatAdapters.h" |
21 | #include <cstdlib> |
22 | #include <limits.h> |
23 | #include <memory> |
24 | #include <sstream> |
25 | #include <string> |
26 | |
27 | namespace lldb_private::dil { |
28 | |
29 | DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr, |
30 | const std::string &message, uint32_t loc, |
31 | uint16_t err_len) |
32 | : ErrorInfo(make_error_code(e: std::errc::invalid_argument)) { |
33 | DiagnosticDetail::SourceLocation sloc = { |
34 | .file: FileSpec{}, /*line=*/1, .column: static_cast<uint16_t>(loc + 1), |
35 | .length: err_len, .hidden: false, /*in_user_input=*/true}; |
36 | std::string rendered_msg = |
37 | llvm::formatv(Fmt: "<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^" , |
38 | Vals: loc + 1, Vals: message, Vals&: expr); |
39 | m_detail.source_location = sloc; |
40 | m_detail.severity = lldb::eSeverityError; |
41 | m_detail.message = message; |
42 | m_detail.rendered = std::move(rendered_msg); |
43 | } |
44 | |
45 | llvm::Expected<ASTNodeUP> |
46 | DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer, |
47 | std::shared_ptr<StackFrame> frame_sp, |
48 | lldb::DynamicValueType use_dynamic, bool use_synthetic, |
49 | bool fragile_ivar, bool check_ptr_vs_member) { |
50 | llvm::Error error = llvm::Error::success(); |
51 | DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic, |
52 | fragile_ivar, check_ptr_vs_member, error); |
53 | |
54 | ASTNodeUP node_up = parser.Run(); |
55 | |
56 | if (error) |
57 | return error; |
58 | |
59 | return node_up; |
60 | } |
61 | |
62 | DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, |
63 | std::shared_ptr<StackFrame> frame_sp, |
64 | lldb::DynamicValueType use_dynamic, bool use_synthetic, |
65 | bool fragile_ivar, bool check_ptr_vs_member, |
66 | llvm::Error &error) |
67 | : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr), |
68 | m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic), |
69 | m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar), |
70 | m_check_ptr_vs_member(check_ptr_vs_member) {} |
71 | |
72 | ASTNodeUP DILParser::Run() { |
73 | ASTNodeUP expr = ParseExpression(); |
74 | |
75 | Expect(kind: Token::Kind::eof); |
76 | |
77 | return expr; |
78 | } |
79 | |
80 | // Parse an expression. |
81 | // |
82 | // expression: |
83 | // unary_expression |
84 | // |
85 | ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); } |
86 | |
87 | // Parse an unary_expression. |
88 | // |
89 | // unary_expression: |
90 | // postfix_expression |
91 | // unary_operator expression |
92 | // |
93 | // unary_operator: |
94 | // "&" |
95 | // "*" |
96 | // |
97 | ASTNodeUP DILParser::ParseUnaryExpression() { |
98 | if (CurToken().IsOneOf(kinds: {Token::amp, Token::star})) { |
99 | Token token = CurToken(); |
100 | uint32_t loc = token.GetLocation(); |
101 | m_dil_lexer.Advance(); |
102 | auto rhs = ParseExpression(); |
103 | switch (token.GetKind()) { |
104 | case Token::star: |
105 | return std::make_unique<UnaryOpNode>(args&: loc, args: UnaryOpKind::Deref, |
106 | args: std::move(rhs)); |
107 | case Token::amp: |
108 | return std::make_unique<UnaryOpNode>(args&: loc, args: UnaryOpKind::AddrOf, |
109 | args: std::move(rhs)); |
110 | |
111 | default: |
112 | llvm_unreachable("invalid token kind" ); |
113 | } |
114 | } |
115 | return ParsePostfixExpression(); |
116 | } |
117 | |
118 | // Parse a postfix_expression. |
119 | // |
120 | // postfix_expression: |
121 | // primary_expression |
122 | // postfix_expression "[" integer_literal "]" |
123 | // postfix_expression "." id_expression |
124 | // postfix_expression "->" id_expression |
125 | // |
126 | ASTNodeUP DILParser::ParsePostfixExpression() { |
127 | ASTNodeUP lhs = ParsePrimaryExpression(); |
128 | while (CurToken().IsOneOf(kinds: {Token::l_square, Token::period, Token::arrow})) { |
129 | uint32_t loc = CurToken().GetLocation(); |
130 | Token token = CurToken(); |
131 | switch (token.GetKind()) { |
132 | case Token::l_square: { |
133 | m_dil_lexer.Advance(); |
134 | std::optional<int64_t> rhs = ParseIntegerConstant(); |
135 | if (!rhs) { |
136 | BailOut( |
137 | error: llvm::formatv(Fmt: "failed to parse integer constant: {0}" , Vals: CurToken()), |
138 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
139 | return std::make_unique<ErrorNode>(); |
140 | } |
141 | Expect(kind: Token::r_square); |
142 | m_dil_lexer.Advance(); |
143 | lhs = std::make_unique<ArraySubscriptNode>(args&: loc, args: std::move(lhs), |
144 | args: std::move(*rhs)); |
145 | break; |
146 | } |
147 | case Token::period: |
148 | case Token::arrow: { |
149 | m_dil_lexer.Advance(); |
150 | Token member_token = CurToken(); |
151 | std::string member_id = ParseIdExpression(); |
152 | lhs = std::make_unique<MemberOfNode>( |
153 | args: member_token.GetLocation(), args: std::move(lhs), |
154 | args: token.GetKind() == Token::arrow, args&: member_id); |
155 | break; |
156 | } |
157 | default: |
158 | llvm_unreachable("invalid token" ); |
159 | } |
160 | } |
161 | |
162 | return lhs; |
163 | } |
164 | |
165 | // Parse a primary_expression. |
166 | // |
167 | // primary_expression: |
168 | // id_expression |
169 | // "(" expression ")" |
170 | // |
171 | ASTNodeUP DILParser::ParsePrimaryExpression() { |
172 | if (CurToken().IsOneOf(kinds: {Token::coloncolon, Token::identifier})) { |
173 | // Save the source location for the diagnostics message. |
174 | uint32_t loc = CurToken().GetLocation(); |
175 | auto identifier = ParseIdExpression(); |
176 | |
177 | return std::make_unique<IdentifierNode>(args&: loc, args&: identifier); |
178 | } |
179 | |
180 | if (CurToken().Is(kind: Token::l_paren)) { |
181 | m_dil_lexer.Advance(); |
182 | auto expr = ParseExpression(); |
183 | Expect(kind: Token::r_paren); |
184 | m_dil_lexer.Advance(); |
185 | return expr; |
186 | } |
187 | |
188 | BailOut(error: llvm::formatv(Fmt: "Unexpected token: {0}" , Vals: CurToken()), |
189 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
190 | return std::make_unique<ErrorNode>(); |
191 | } |
192 | |
193 | // Parse nested_name_specifier. |
194 | // |
195 | // nested_name_specifier: |
196 | // type_name "::" |
197 | // namespace_name "::" |
198 | // nested_name_specifier identifier "::" |
199 | // |
200 | std::string DILParser::ParseNestedNameSpecifier() { |
201 | // The first token in nested_name_specifier is always an identifier, or |
202 | // '(anonymous namespace)'. |
203 | switch (CurToken().GetKind()) { |
204 | case Token::l_paren: { |
205 | // Anonymous namespaces need to be treated specially: They are |
206 | // represented the the string '(anonymous namespace)', which has a |
207 | // space in it (throwing off normal parsing) and is not actually |
208 | // proper C++> Check to see if we're looking at |
209 | // '(anonymous namespace)::...' |
210 | |
211 | // Look for all the pieces, in order: |
212 | // l_paren 'anonymous' 'namespace' r_paren coloncolon |
213 | if (m_dil_lexer.LookAhead(N: 1).Is(kind: Token::identifier) && |
214 | (m_dil_lexer.LookAhead(N: 1).GetSpelling() == "anonymous" ) && |
215 | m_dil_lexer.LookAhead(N: 2).Is(kind: Token::identifier) && |
216 | (m_dil_lexer.LookAhead(N: 2).GetSpelling() == "namespace" ) && |
217 | m_dil_lexer.LookAhead(N: 3).Is(kind: Token::r_paren) && |
218 | m_dil_lexer.LookAhead(N: 4).Is(kind: Token::coloncolon)) { |
219 | m_dil_lexer.Advance(N: 4); |
220 | |
221 | assert( |
222 | (CurToken().Is(Token::identifier) || CurToken().Is(Token::l_paren)) && |
223 | "Expected an identifier or anonymous namespace, but not found." ); |
224 | // Continue parsing the nested_namespace_specifier. |
225 | std::string identifier2 = ParseNestedNameSpecifier(); |
226 | if (identifier2.empty()) { |
227 | Expect(kind: Token::identifier); |
228 | identifier2 = CurToken().GetSpelling(); |
229 | m_dil_lexer.Advance(); |
230 | } |
231 | return "(anonymous namespace)::" + identifier2; |
232 | } |
233 | |
234 | return "" ; |
235 | } // end of special handling for '(anonymous namespace)' |
236 | case Token::identifier: { |
237 | // If the next token is scope ("::"), then this is indeed a |
238 | // nested_name_specifier |
239 | if (m_dil_lexer.LookAhead(N: 1).Is(kind: Token::coloncolon)) { |
240 | // This nested_name_specifier is a single identifier. |
241 | std::string identifier = CurToken().GetSpelling(); |
242 | m_dil_lexer.Advance(N: 1); |
243 | Expect(kind: Token::coloncolon); |
244 | m_dil_lexer.Advance(); |
245 | // Continue parsing the nested_name_specifier. |
246 | return identifier + "::" + ParseNestedNameSpecifier(); |
247 | } |
248 | |
249 | return "" ; |
250 | } |
251 | default: |
252 | return "" ; |
253 | } |
254 | } |
255 | |
256 | // Parse an id_expression. |
257 | // |
258 | // id_expression: |
259 | // unqualified_id |
260 | // qualified_id |
261 | // |
262 | // qualified_id: |
263 | // ["::"] [nested_name_specifier] unqualified_id |
264 | // ["::"] identifier |
265 | // |
266 | // identifier: |
267 | // ? Token::identifier ? |
268 | // |
269 | std::string DILParser::ParseIdExpression() { |
270 | // Try parsing optional global scope operator. |
271 | bool global_scope = false; |
272 | if (CurToken().Is(kind: Token::coloncolon)) { |
273 | global_scope = true; |
274 | m_dil_lexer.Advance(); |
275 | } |
276 | |
277 | // Try parsing optional nested_name_specifier. |
278 | std::string nested_name_specifier = ParseNestedNameSpecifier(); |
279 | |
280 | // If nested_name_specifier is present, then it's qualified_id production. |
281 | // Follow the first production rule. |
282 | if (!nested_name_specifier.empty()) { |
283 | // Parse unqualified_id and construct a fully qualified id expression. |
284 | auto unqualified_id = ParseUnqualifiedId(); |
285 | |
286 | return llvm::formatv(Fmt: "{0}{1}{2}" , Vals: global_scope ? "::" : "" , |
287 | Vals&: nested_name_specifier, Vals&: unqualified_id); |
288 | } |
289 | |
290 | // No nested_name_specifier, but with global scope -- this is also a |
291 | // qualified_id production. Follow the second production rule. |
292 | if (global_scope) { |
293 | Expect(kind: Token::identifier); |
294 | std::string identifier = CurToken().GetSpelling(); |
295 | m_dil_lexer.Advance(); |
296 | return llvm::formatv(Fmt: "{0}{1}" , Vals: global_scope ? "::" : "" , Vals&: identifier); |
297 | } |
298 | |
299 | // This is unqualified_id production. |
300 | return ParseUnqualifiedId(); |
301 | } |
302 | |
303 | // Parse an unqualified_id. |
304 | // |
305 | // unqualified_id: |
306 | // identifier |
307 | // |
308 | // identifier: |
309 | // ? Token::identifier ? |
310 | // |
311 | std::string DILParser::ParseUnqualifiedId() { |
312 | Expect(kind: Token::identifier); |
313 | std::string identifier = CurToken().GetSpelling(); |
314 | m_dil_lexer.Advance(); |
315 | return identifier; |
316 | } |
317 | |
318 | void DILParser::BailOut(const std::string &error, uint32_t loc, |
319 | uint16_t err_len) { |
320 | if (m_error) |
321 | // If error is already set, then the parser is in the "bail-out" mode. Don't |
322 | // do anything and keep the original error. |
323 | return; |
324 | |
325 | m_error = |
326 | llvm::make_error<DILDiagnosticError>(Args&: m_input_expr, Args: error, Args&: loc, Args&: err_len); |
327 | // Advance the lexer token index to the end of the lexed tokens vector. |
328 | m_dil_lexer.ResetTokenIdx(new_value: m_dil_lexer.NumLexedTokens() - 1); |
329 | } |
330 | |
331 | // Parse a integer_literal. |
332 | // |
333 | // integer_literal: |
334 | // ? Integer constant ? |
335 | // |
336 | std::optional<int64_t> DILParser::ParseIntegerConstant() { |
337 | auto spelling = CurToken().GetSpelling(); |
338 | llvm::StringRef spelling_ref = spelling; |
339 | int64_t raw_value; |
340 | if (!spelling_ref.getAsInteger<int64_t>(Radix: 0, Result&: raw_value)) { |
341 | m_dil_lexer.Advance(); |
342 | return raw_value; |
343 | } |
344 | |
345 | return std::nullopt; |
346 | } |
347 | |
348 | void DILParser::Expect(Token::Kind kind) { |
349 | if (CurToken().IsNot(kind)) { |
350 | BailOut(error: llvm::formatv(Fmt: "expected {0}, got: {1}" , Vals&: kind, Vals: CurToken()), |
351 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
352 | } |
353 | } |
354 | |
355 | } // namespace lldb_private::dil |
356 | |