1//===-- DILLexer.cpp ------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
14#include "lldb/ValueObject/DILLexer.h"
15#include "lldb/Utility/Status.h"
16#include "lldb/ValueObject/DILParser.h"
17#include "llvm/ADT/StringSwitch.h"
18
19namespace lldb_private::dil {
20
21llvm::StringRef Token::GetTokenName(Kind kind) {
22 switch (kind) {
23 case Kind::amp:
24 return "amp";
25 case Kind::arrow:
26 return "arrow";
27 case Kind::coloncolon:
28 return "coloncolon";
29 case Kind::eof:
30 return "eof";
31 case Kind::identifier:
32 return "identifier";
33 case Kind::l_paren:
34 return "l_paren";
35 case Kind::l_square:
36 return "l_square";
37 case Kind::minus:
38 return "minus";
39 case Kind::numeric_constant:
40 return "numeric_constant";
41 case Kind::period:
42 return "period";
43 case Kind::r_paren:
44 return "r_paren";
45 case Kind::r_square:
46 return "r_square";
47 case Token::star:
48 return "star";
49 }
50 llvm_unreachable("Unknown token name");
51}
52
53static bool IsLetter(char c) {
54 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
55}
56
57static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
58
59// A word starts with a letter, underscore, or dollar sign, followed by
60// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
61static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
62 llvm::StringRef &remainder) {
63 // Find the longest prefix consisting of letters, digits, underscors and
64 // '$'. If it doesn't start with a digit, then it's a word.
65 llvm::StringRef candidate = remainder.take_while(
66 F: [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
67 if (candidate.empty() || IsDigit(c: candidate[0]))
68 return std::nullopt;
69 remainder = remainder.drop_front(N: candidate.size());
70 return candidate;
71}
72
73static bool IsNumberBodyChar(char ch) { return IsDigit(c: ch) || IsLetter(c: ch); }
74
75static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
76 llvm::StringRef &remainder) {
77 if (IsDigit(c: remainder[0])) {
78 llvm::StringRef number = remainder.take_while(F: IsNumberBodyChar);
79 remainder = remainder.drop_front(N: number.size());
80 return number;
81 }
82 return std::nullopt;
83}
84
85llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
86 std::vector<Token> tokens;
87 llvm::StringRef remainder = expr;
88 do {
89 if (llvm::Expected<Token> t = Lex(expr, remainder)) {
90 tokens.push_back(x: std::move(*t));
91 } else {
92 return t.takeError();
93 }
94 } while (tokens.back().GetKind() != Token::eof);
95 return DILLexer(expr, std::move(tokens));
96}
97
98llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
99 llvm::StringRef &remainder) {
100 // Skip over whitespace (spaces).
101 remainder = remainder.ltrim();
102 llvm::StringRef::iterator cur_pos = remainder.begin();
103
104 // Check to see if we've reached the end of our input string.
105 if (remainder.empty())
106 return Token(Token::eof, "", (uint32_t)expr.size());
107
108 uint32_t position = cur_pos - expr.begin();
109 std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
110 if (maybe_number)
111 return Token(Token::numeric_constant, maybe_number->str(), position);
112 std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
113 if (maybe_word)
114 return Token(Token::identifier, maybe_word->str(), position);
115
116 constexpr std::pair<Token::Kind, const char *> operators[] = {
117 {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
118 {Token::l_paren, "("}, {Token::l_square, "["}, {Token::minus, "-"},
119 {Token::period, "."}, {Token::r_paren, ")"}, {Token::r_square, "]"},
120 {Token::star, "*"},
121 };
122 for (auto [kind, str] : operators) {
123 if (remainder.consume_front(Prefix: str))
124 return Token(kind, str, position);
125 }
126
127 // Unrecognized character(s) in string; unable to lex it.
128 return llvm::make_error<DILDiagnosticError>(Args&: expr, Args: "unrecognized token",
129 Args&: position);
130}
131
132} // namespace lldb_private::dil
133

source code of lldb/source/ValueObject/DILLexer.cpp