| 1 | //===-- DILLexer.cpp ------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | // This implements the recursive descent parser for the Data Inspection |
| 8 | // Language (DIL), and its helper functions, which will eventually underlie the |
| 9 | // 'frame variable' command. The language that this parser recognizes is |
| 10 | // described in lldb/docs/dil-expr-lang.ebnf |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "lldb/ValueObject/DILLexer.h" |
| 15 | #include "lldb/Utility/Status.h" |
| 16 | #include "lldb/ValueObject/DILParser.h" |
| 17 | #include "llvm/ADT/StringSwitch.h" |
| 18 | |
| 19 | namespace lldb_private::dil { |
| 20 | |
| 21 | llvm::StringRef Token::GetTokenName(Kind kind) { |
| 22 | switch (kind) { |
| 23 | case Kind::amp: |
| 24 | return "amp" ; |
| 25 | case Kind::arrow: |
| 26 | return "arrow" ; |
| 27 | case Kind::coloncolon: |
| 28 | return "coloncolon" ; |
| 29 | case Kind::eof: |
| 30 | return "eof" ; |
| 31 | case Kind::identifier: |
| 32 | return "identifier" ; |
| 33 | case Kind::l_paren: |
| 34 | return "l_paren" ; |
| 35 | case Kind::l_square: |
| 36 | return "l_square" ; |
| 37 | case Kind::numeric_constant: |
| 38 | return "numeric_constant" ; |
| 39 | case Kind::period: |
| 40 | return "period" ; |
| 41 | case Kind::r_paren: |
| 42 | return "r_paren" ; |
| 43 | case Kind::r_square: |
| 44 | return "r_square" ; |
| 45 | case Token::star: |
| 46 | return "star" ; |
| 47 | } |
| 48 | llvm_unreachable("Unknown token name" ); |
| 49 | } |
| 50 | |
| 51 | static bool IsLetter(char c) { |
| 52 | return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); |
| 53 | } |
| 54 | |
| 55 | static bool IsDigit(char c) { return '0' <= c && c <= '9'; } |
| 56 | |
| 57 | // A word starts with a letter, underscore, or dollar sign, followed by |
| 58 | // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. |
| 59 | static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr, |
| 60 | llvm::StringRef &remainder) { |
| 61 | // Find the longest prefix consisting of letters, digits, underscors and |
| 62 | // '$'. If it doesn't start with a digit, then it's a word. |
| 63 | llvm::StringRef candidate = remainder.take_while( |
| 64 | F: [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; }); |
| 65 | if (candidate.empty() || IsDigit(c: candidate[0])) |
| 66 | return std::nullopt; |
| 67 | remainder = remainder.drop_front(N: candidate.size()); |
| 68 | return candidate; |
| 69 | } |
| 70 | |
| 71 | static bool IsNumberBodyChar(char ch) { return IsDigit(c: ch) || IsLetter(c: ch); } |
| 72 | |
| 73 | static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, |
| 74 | llvm::StringRef &remainder) { |
| 75 | if (IsDigit(c: remainder[0])) { |
| 76 | llvm::StringRef number = remainder.take_while(F: IsNumberBodyChar); |
| 77 | remainder = remainder.drop_front(N: number.size()); |
| 78 | return number; |
| 79 | } |
| 80 | return std::nullopt; |
| 81 | } |
| 82 | |
| 83 | llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) { |
| 84 | std::vector<Token> tokens; |
| 85 | llvm::StringRef remainder = expr; |
| 86 | do { |
| 87 | if (llvm::Expected<Token> t = Lex(expr, remainder)) { |
| 88 | tokens.push_back(x: std::move(*t)); |
| 89 | } else { |
| 90 | return t.takeError(); |
| 91 | } |
| 92 | } while (tokens.back().GetKind() != Token::eof); |
| 93 | return DILLexer(expr, std::move(tokens)); |
| 94 | } |
| 95 | |
| 96 | llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, |
| 97 | llvm::StringRef &remainder) { |
| 98 | // Skip over whitespace (spaces). |
| 99 | remainder = remainder.ltrim(); |
| 100 | llvm::StringRef::iterator cur_pos = remainder.begin(); |
| 101 | |
| 102 | // Check to see if we've reached the end of our input string. |
| 103 | if (remainder.empty()) |
| 104 | return Token(Token::eof, "" , (uint32_t)expr.size()); |
| 105 | |
| 106 | uint32_t position = cur_pos - expr.begin(); |
| 107 | std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder); |
| 108 | if (maybe_number) |
| 109 | return Token(Token::numeric_constant, maybe_number->str(), position); |
| 110 | std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder); |
| 111 | if (maybe_word) |
| 112 | return Token(Token::identifier, maybe_word->str(), position); |
| 113 | |
| 114 | constexpr std::pair<Token::Kind, const char *> operators[] = { |
| 115 | {Token::amp, "&" }, {Token::arrow, "->" }, {Token::coloncolon, "::" }, |
| 116 | {Token::l_paren, "(" }, {Token::l_square, "[" }, {Token::period, "." }, |
| 117 | {Token::r_paren, ")" }, {Token::r_square, "]" }, {Token::star, "*" }, |
| 118 | }; |
| 119 | for (auto [kind, str] : operators) { |
| 120 | if (remainder.consume_front(Prefix: str)) |
| 121 | return Token(kind, str, position); |
| 122 | } |
| 123 | |
| 124 | // Unrecognized character(s) in string; unable to lex it. |
| 125 | return llvm::make_error<DILDiagnosticError>(Args&: expr, Args: "unrecognized token" , |
| 126 | Args&: position); |
| 127 | } |
| 128 | |
| 129 | } // namespace lldb_private::dil |
| 130 | |