1//===-- DILLexer.cpp ------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
14#include "lldb/ValueObject/DILLexer.h"
15#include "lldb/Utility/Status.h"
16#include "lldb/ValueObject/DILParser.h"
17#include "llvm/ADT/StringSwitch.h"
18
19namespace lldb_private::dil {
20
21llvm::StringRef Token::GetTokenName(Kind kind) {
22 switch (kind) {
23 case Kind::amp:
24 return "amp";
25 case Kind::arrow:
26 return "arrow";
27 case Kind::coloncolon:
28 return "coloncolon";
29 case Kind::eof:
30 return "eof";
31 case Kind::identifier:
32 return "identifier";
33 case Kind::l_paren:
34 return "l_paren";
35 case Kind::l_square:
36 return "l_square";
37 case Kind::numeric_constant:
38 return "numeric_constant";
39 case Kind::period:
40 return "period";
41 case Kind::r_paren:
42 return "r_paren";
43 case Kind::r_square:
44 return "r_square";
45 case Token::star:
46 return "star";
47 }
48 llvm_unreachable("Unknown token name");
49}
50
51static bool IsLetter(char c) {
52 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
53}
54
55static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
56
57// A word starts with a letter, underscore, or dollar sign, followed by
58// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
59static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
60 llvm::StringRef &remainder) {
61 // Find the longest prefix consisting of letters, digits, underscors and
62 // '$'. If it doesn't start with a digit, then it's a word.
63 llvm::StringRef candidate = remainder.take_while(
64 F: [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
65 if (candidate.empty() || IsDigit(c: candidate[0]))
66 return std::nullopt;
67 remainder = remainder.drop_front(N: candidate.size());
68 return candidate;
69}
70
71static bool IsNumberBodyChar(char ch) { return IsDigit(c: ch) || IsLetter(c: ch); }
72
73static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
74 llvm::StringRef &remainder) {
75 if (IsDigit(c: remainder[0])) {
76 llvm::StringRef number = remainder.take_while(F: IsNumberBodyChar);
77 remainder = remainder.drop_front(N: number.size());
78 return number;
79 }
80 return std::nullopt;
81}
82
83llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
84 std::vector<Token> tokens;
85 llvm::StringRef remainder = expr;
86 do {
87 if (llvm::Expected<Token> t = Lex(expr, remainder)) {
88 tokens.push_back(x: std::move(*t));
89 } else {
90 return t.takeError();
91 }
92 } while (tokens.back().GetKind() != Token::eof);
93 return DILLexer(expr, std::move(tokens));
94}
95
96llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
97 llvm::StringRef &remainder) {
98 // Skip over whitespace (spaces).
99 remainder = remainder.ltrim();
100 llvm::StringRef::iterator cur_pos = remainder.begin();
101
102 // Check to see if we've reached the end of our input string.
103 if (remainder.empty())
104 return Token(Token::eof, "", (uint32_t)expr.size());
105
106 uint32_t position = cur_pos - expr.begin();
107 std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
108 if (maybe_number)
109 return Token(Token::numeric_constant, maybe_number->str(), position);
110 std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
111 if (maybe_word)
112 return Token(Token::identifier, maybe_word->str(), position);
113
114 constexpr std::pair<Token::Kind, const char *> operators[] = {
115 {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
116 {Token::l_paren, "("}, {Token::l_square, "["}, {Token::period, "."},
117 {Token::r_paren, ")"}, {Token::r_square, "]"}, {Token::star, "*"},
118 };
119 for (auto [kind, str] : operators) {
120 if (remainder.consume_front(Prefix: str))
121 return Token(kind, str, position);
122 }
123
124 // Unrecognized character(s) in string; unable to lex it.
125 return llvm::make_error<DILDiagnosticError>(Args&: expr, Args: "unrecognized token",
126 Args&: position);
127}
128
129} // namespace lldb_private::dil
130

source code of lldb/source/ValueObject/DILLexer.cpp