| 1 | //===-- DILParser.cpp -----------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | // This implements the recursive descent parser for the Data Inspection |
| 8 | // Language (DIL), and its helper functions, which will eventually underlie the |
| 9 | // 'frame variable' command. The language that this parser recognizes is |
| 10 | // described in lldb/docs/dil-expr-lang.ebnf |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "lldb/ValueObject/DILParser.h" |
| 15 | #include "lldb/Target/ExecutionContextScope.h" |
| 16 | #include "lldb/Utility/DiagnosticsRendering.h" |
| 17 | #include "lldb/ValueObject/DILAST.h" |
| 18 | #include "lldb/ValueObject/DILEval.h" |
| 19 | #include "llvm/ADT/StringRef.h" |
| 20 | #include "llvm/Support/FormatAdapters.h" |
| 21 | #include <cstdlib> |
| 22 | #include <limits.h> |
| 23 | #include <memory> |
| 24 | #include <sstream> |
| 25 | #include <string> |
| 26 | |
| 27 | namespace lldb_private::dil { |
| 28 | |
| 29 | DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr, |
| 30 | const std::string &message, uint32_t loc, |
| 31 | uint16_t err_len) |
| 32 | : ErrorInfo(make_error_code(e: std::errc::invalid_argument)) { |
| 33 | DiagnosticDetail::SourceLocation sloc = { |
| 34 | .file: FileSpec{}, /*line=*/1, .column: static_cast<uint16_t>(loc + 1), |
| 35 | .length: err_len, .hidden: false, /*in_user_input=*/true}; |
| 36 | std::string rendered_msg = |
| 37 | llvm::formatv(Fmt: "<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^" , |
| 38 | Vals: loc + 1, Vals: message, Vals&: expr); |
| 39 | m_detail.source_location = sloc; |
| 40 | m_detail.severity = lldb::eSeverityError; |
| 41 | m_detail.message = message; |
| 42 | m_detail.rendered = std::move(rendered_msg); |
| 43 | } |
| 44 | |
| 45 | llvm::Expected<ASTNodeUP> |
| 46 | DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer, |
| 47 | std::shared_ptr<StackFrame> frame_sp, |
| 48 | lldb::DynamicValueType use_dynamic, bool use_synthetic, |
| 49 | bool fragile_ivar, bool check_ptr_vs_member) { |
| 50 | llvm::Error error = llvm::Error::success(); |
| 51 | DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic, |
| 52 | fragile_ivar, check_ptr_vs_member, error); |
| 53 | |
| 54 | ASTNodeUP node_up = parser.Run(); |
| 55 | |
| 56 | if (error) |
| 57 | return error; |
| 58 | |
| 59 | return node_up; |
| 60 | } |
| 61 | |
| 62 | DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, |
| 63 | std::shared_ptr<StackFrame> frame_sp, |
| 64 | lldb::DynamicValueType use_dynamic, bool use_synthetic, |
| 65 | bool fragile_ivar, bool check_ptr_vs_member, |
| 66 | llvm::Error &error) |
| 67 | : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr), |
| 68 | m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic), |
| 69 | m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar), |
| 70 | m_check_ptr_vs_member(check_ptr_vs_member) {} |
| 71 | |
| 72 | ASTNodeUP DILParser::Run() { |
| 73 | ASTNodeUP expr = ParseExpression(); |
| 74 | |
| 75 | Expect(kind: Token::Kind::eof); |
| 76 | |
| 77 | return expr; |
| 78 | } |
| 79 | |
| 80 | // Parse an expression. |
| 81 | // |
| 82 | // expression: |
| 83 | // unary_expression |
| 84 | // |
| 85 | ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); } |
| 86 | |
| 87 | // Parse an unary_expression. |
| 88 | // |
| 89 | // unary_expression: |
| 90 | // postfix_expression |
| 91 | // unary_operator expression |
| 92 | // |
| 93 | // unary_operator: |
| 94 | // "&" |
| 95 | // "*" |
| 96 | // |
| 97 | ASTNodeUP DILParser::ParseUnaryExpression() { |
| 98 | if (CurToken().IsOneOf(kinds: {Token::amp, Token::star})) { |
| 99 | Token token = CurToken(); |
| 100 | uint32_t loc = token.GetLocation(); |
| 101 | m_dil_lexer.Advance(); |
| 102 | auto rhs = ParseExpression(); |
| 103 | switch (token.GetKind()) { |
| 104 | case Token::star: |
| 105 | return std::make_unique<UnaryOpNode>(args&: loc, args: UnaryOpKind::Deref, |
| 106 | args: std::move(rhs)); |
| 107 | case Token::amp: |
| 108 | return std::make_unique<UnaryOpNode>(args&: loc, args: UnaryOpKind::AddrOf, |
| 109 | args: std::move(rhs)); |
| 110 | |
| 111 | default: |
| 112 | llvm_unreachable("invalid token kind" ); |
| 113 | } |
| 114 | } |
| 115 | return ParsePostfixExpression(); |
| 116 | } |
| 117 | |
| 118 | // Parse a postfix_expression. |
| 119 | // |
| 120 | // postfix_expression: |
| 121 | // primary_expression |
| 122 | // postfix_expression "[" integer_literal "]" |
| 123 | // postfix_expression "." id_expression |
| 124 | // postfix_expression "->" id_expression |
| 125 | // |
| 126 | ASTNodeUP DILParser::ParsePostfixExpression() { |
| 127 | ASTNodeUP lhs = ParsePrimaryExpression(); |
| 128 | while (CurToken().IsOneOf(kinds: {Token::l_square, Token::period, Token::arrow})) { |
| 129 | uint32_t loc = CurToken().GetLocation(); |
| 130 | Token token = CurToken(); |
| 131 | switch (token.GetKind()) { |
| 132 | case Token::l_square: { |
| 133 | m_dil_lexer.Advance(); |
| 134 | std::optional<int64_t> rhs = ParseIntegerConstant(); |
| 135 | if (!rhs) { |
| 136 | BailOut( |
| 137 | error: llvm::formatv(Fmt: "failed to parse integer constant: {0}" , Vals: CurToken()), |
| 138 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
| 139 | return std::make_unique<ErrorNode>(); |
| 140 | } |
| 141 | Expect(kind: Token::r_square); |
| 142 | m_dil_lexer.Advance(); |
| 143 | lhs = std::make_unique<ArraySubscriptNode>(args&: loc, args: std::move(lhs), |
| 144 | args: std::move(*rhs)); |
| 145 | break; |
| 146 | } |
| 147 | case Token::period: |
| 148 | case Token::arrow: { |
| 149 | m_dil_lexer.Advance(); |
| 150 | Token member_token = CurToken(); |
| 151 | std::string member_id = ParseIdExpression(); |
| 152 | lhs = std::make_unique<MemberOfNode>( |
| 153 | args: member_token.GetLocation(), args: std::move(lhs), |
| 154 | args: token.GetKind() == Token::arrow, args&: member_id); |
| 155 | break; |
| 156 | } |
| 157 | default: |
| 158 | llvm_unreachable("invalid token" ); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | return lhs; |
| 163 | } |
| 164 | |
| 165 | // Parse a primary_expression. |
| 166 | // |
| 167 | // primary_expression: |
| 168 | // id_expression |
| 169 | // "(" expression ")" |
| 170 | // |
| 171 | ASTNodeUP DILParser::ParsePrimaryExpression() { |
| 172 | if (CurToken().IsOneOf(kinds: {Token::coloncolon, Token::identifier})) { |
| 173 | // Save the source location for the diagnostics message. |
| 174 | uint32_t loc = CurToken().GetLocation(); |
| 175 | auto identifier = ParseIdExpression(); |
| 176 | |
| 177 | return std::make_unique<IdentifierNode>(args&: loc, args&: identifier); |
| 178 | } |
| 179 | |
| 180 | if (CurToken().Is(kind: Token::l_paren)) { |
| 181 | m_dil_lexer.Advance(); |
| 182 | auto expr = ParseExpression(); |
| 183 | Expect(kind: Token::r_paren); |
| 184 | m_dil_lexer.Advance(); |
| 185 | return expr; |
| 186 | } |
| 187 | |
| 188 | BailOut(error: llvm::formatv(Fmt: "Unexpected token: {0}" , Vals: CurToken()), |
| 189 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
| 190 | return std::make_unique<ErrorNode>(); |
| 191 | } |
| 192 | |
| 193 | // Parse nested_name_specifier. |
| 194 | // |
| 195 | // nested_name_specifier: |
| 196 | // type_name "::" |
| 197 | // namespace_name "::" |
| 198 | // nested_name_specifier identifier "::" |
| 199 | // |
| 200 | std::string DILParser::ParseNestedNameSpecifier() { |
| 201 | // The first token in nested_name_specifier is always an identifier, or |
| 202 | // '(anonymous namespace)'. |
| 203 | switch (CurToken().GetKind()) { |
| 204 | case Token::l_paren: { |
| 205 | // Anonymous namespaces need to be treated specially: They are |
| 206 | // represented the the string '(anonymous namespace)', which has a |
| 207 | // space in it (throwing off normal parsing) and is not actually |
| 208 | // proper C++> Check to see if we're looking at |
| 209 | // '(anonymous namespace)::...' |
| 210 | |
| 211 | // Look for all the pieces, in order: |
| 212 | // l_paren 'anonymous' 'namespace' r_paren coloncolon |
| 213 | if (m_dil_lexer.LookAhead(N: 1).Is(kind: Token::identifier) && |
| 214 | (m_dil_lexer.LookAhead(N: 1).GetSpelling() == "anonymous" ) && |
| 215 | m_dil_lexer.LookAhead(N: 2).Is(kind: Token::identifier) && |
| 216 | (m_dil_lexer.LookAhead(N: 2).GetSpelling() == "namespace" ) && |
| 217 | m_dil_lexer.LookAhead(N: 3).Is(kind: Token::r_paren) && |
| 218 | m_dil_lexer.LookAhead(N: 4).Is(kind: Token::coloncolon)) { |
| 219 | m_dil_lexer.Advance(N: 4); |
| 220 | |
| 221 | assert( |
| 222 | (CurToken().Is(Token::identifier) || CurToken().Is(Token::l_paren)) && |
| 223 | "Expected an identifier or anonymous namespace, but not found." ); |
| 224 | // Continue parsing the nested_namespace_specifier. |
| 225 | std::string identifier2 = ParseNestedNameSpecifier(); |
| 226 | if (identifier2.empty()) { |
| 227 | Expect(kind: Token::identifier); |
| 228 | identifier2 = CurToken().GetSpelling(); |
| 229 | m_dil_lexer.Advance(); |
| 230 | } |
| 231 | return "(anonymous namespace)::" + identifier2; |
| 232 | } |
| 233 | |
| 234 | return "" ; |
| 235 | } // end of special handling for '(anonymous namespace)' |
| 236 | case Token::identifier: { |
| 237 | // If the next token is scope ("::"), then this is indeed a |
| 238 | // nested_name_specifier |
| 239 | if (m_dil_lexer.LookAhead(N: 1).Is(kind: Token::coloncolon)) { |
| 240 | // This nested_name_specifier is a single identifier. |
| 241 | std::string identifier = CurToken().GetSpelling(); |
| 242 | m_dil_lexer.Advance(N: 1); |
| 243 | Expect(kind: Token::coloncolon); |
| 244 | m_dil_lexer.Advance(); |
| 245 | // Continue parsing the nested_name_specifier. |
| 246 | return identifier + "::" + ParseNestedNameSpecifier(); |
| 247 | } |
| 248 | |
| 249 | return "" ; |
| 250 | } |
| 251 | default: |
| 252 | return "" ; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | // Parse an id_expression. |
| 257 | // |
| 258 | // id_expression: |
| 259 | // unqualified_id |
| 260 | // qualified_id |
| 261 | // |
| 262 | // qualified_id: |
| 263 | // ["::"] [nested_name_specifier] unqualified_id |
| 264 | // ["::"] identifier |
| 265 | // |
| 266 | // identifier: |
| 267 | // ? Token::identifier ? |
| 268 | // |
| 269 | std::string DILParser::ParseIdExpression() { |
| 270 | // Try parsing optional global scope operator. |
| 271 | bool global_scope = false; |
| 272 | if (CurToken().Is(kind: Token::coloncolon)) { |
| 273 | global_scope = true; |
| 274 | m_dil_lexer.Advance(); |
| 275 | } |
| 276 | |
| 277 | // Try parsing optional nested_name_specifier. |
| 278 | std::string nested_name_specifier = ParseNestedNameSpecifier(); |
| 279 | |
| 280 | // If nested_name_specifier is present, then it's qualified_id production. |
| 281 | // Follow the first production rule. |
| 282 | if (!nested_name_specifier.empty()) { |
| 283 | // Parse unqualified_id and construct a fully qualified id expression. |
| 284 | auto unqualified_id = ParseUnqualifiedId(); |
| 285 | |
| 286 | return llvm::formatv(Fmt: "{0}{1}{2}" , Vals: global_scope ? "::" : "" , |
| 287 | Vals&: nested_name_specifier, Vals&: unqualified_id); |
| 288 | } |
| 289 | |
| 290 | // No nested_name_specifier, but with global scope -- this is also a |
| 291 | // qualified_id production. Follow the second production rule. |
| 292 | if (global_scope) { |
| 293 | Expect(kind: Token::identifier); |
| 294 | std::string identifier = CurToken().GetSpelling(); |
| 295 | m_dil_lexer.Advance(); |
| 296 | return llvm::formatv(Fmt: "{0}{1}" , Vals: global_scope ? "::" : "" , Vals&: identifier); |
| 297 | } |
| 298 | |
| 299 | // This is unqualified_id production. |
| 300 | return ParseUnqualifiedId(); |
| 301 | } |
| 302 | |
| 303 | // Parse an unqualified_id. |
| 304 | // |
| 305 | // unqualified_id: |
| 306 | // identifier |
| 307 | // |
| 308 | // identifier: |
| 309 | // ? Token::identifier ? |
| 310 | // |
| 311 | std::string DILParser::ParseUnqualifiedId() { |
| 312 | Expect(kind: Token::identifier); |
| 313 | std::string identifier = CurToken().GetSpelling(); |
| 314 | m_dil_lexer.Advance(); |
| 315 | return identifier; |
| 316 | } |
| 317 | |
| 318 | void DILParser::BailOut(const std::string &error, uint32_t loc, |
| 319 | uint16_t err_len) { |
| 320 | if (m_error) |
| 321 | // If error is already set, then the parser is in the "bail-out" mode. Don't |
| 322 | // do anything and keep the original error. |
| 323 | return; |
| 324 | |
| 325 | m_error = |
| 326 | llvm::make_error<DILDiagnosticError>(Args&: m_input_expr, Args: error, Args&: loc, Args&: err_len); |
| 327 | // Advance the lexer token index to the end of the lexed tokens vector. |
| 328 | m_dil_lexer.ResetTokenIdx(new_value: m_dil_lexer.NumLexedTokens() - 1); |
| 329 | } |
| 330 | |
| 331 | // Parse a integer_literal. |
| 332 | // |
| 333 | // integer_literal: |
| 334 | // ? Integer constant ? |
| 335 | // |
| 336 | std::optional<int64_t> DILParser::ParseIntegerConstant() { |
| 337 | auto spelling = CurToken().GetSpelling(); |
| 338 | llvm::StringRef spelling_ref = spelling; |
| 339 | int64_t raw_value; |
| 340 | if (!spelling_ref.getAsInteger<int64_t>(Radix: 0, Result&: raw_value)) { |
| 341 | m_dil_lexer.Advance(); |
| 342 | return raw_value; |
| 343 | } |
| 344 | |
| 345 | return std::nullopt; |
| 346 | } |
| 347 | |
| 348 | void DILParser::Expect(Token::Kind kind) { |
| 349 | if (CurToken().IsNot(kind)) { |
| 350 | BailOut(error: llvm::formatv(Fmt: "expected {0}, got: {1}" , Vals&: kind, Vals: CurToken()), |
| 351 | loc: CurToken().GetLocation(), err_len: CurToken().GetSpelling().length()); |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | } // namespace lldb_private::dil |
| 356 | |