1 | //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Transformer/Parsing.h" |
10 | #include "clang/Basic/CharInfo.h" |
11 | #include "clang/Tooling/Transformer/RangeSelector.h" |
12 | #include "llvm/ADT/StringMap.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/Error.h" |
15 | #include <optional> |
16 | #include <string> |
17 | #include <utility> |
18 | |
19 | using namespace clang; |
20 | using namespace transformer; |
21 | |
22 | // FIXME: This implementation is entirely separate from that of the AST |
23 | // matchers. Given the similarity of the languages and uses of the two parsers, |
24 | // the two should share a common parsing infrastructure, as should other |
25 | // Transformer types. We intend to unify this implementation soon to share as |
26 | // much as possible with the AST Matchers parsing. |
27 | |
28 | namespace { |
29 | using llvm::Expected; |
30 | |
31 | template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); |
32 | |
33 | struct ParseState { |
34 | // The remaining input to be processed. |
35 | StringRef Input; |
36 | // The original input. Not modified during parsing; only for reference in |
37 | // error reporting. |
38 | StringRef OriginalInput; |
39 | }; |
40 | |
41 | // Represents an intermediate result returned by a parsing function. Functions |
42 | // that don't generate values should use `std::nullopt` |
43 | template <typename ResultType> struct ParseProgress { |
44 | ParseState State; |
45 | // Intermediate result generated by the Parser. |
46 | ResultType Value; |
47 | }; |
48 | |
49 | template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; |
50 | template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); |
51 | |
52 | class ParseError : public llvm::ErrorInfo<ParseError> { |
53 | public: |
54 | // Required field for all ErrorInfo derivatives. |
55 | static char ID; |
56 | |
57 | ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) |
58 | : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), |
59 | Excerpt(std::move(InputExcerpt)) {} |
60 | |
61 | void log(llvm::raw_ostream &OS) const override { |
62 | OS << "parse error at position (" << Pos << "): " << ErrorMsg |
63 | << ": " + Excerpt; |
64 | } |
65 | |
66 | std::error_code convertToErrorCode() const override { |
67 | return llvm::inconvertibleErrorCode(); |
68 | } |
69 | |
70 | // Position of the error in the input string. |
71 | size_t Pos; |
72 | std::string ErrorMsg; |
73 | // Excerpt of the input starting at the error position. |
74 | std::string Excerpt; |
75 | }; |
76 | |
77 | char ParseError::ID; |
78 | } // namespace |
79 | |
80 | static const llvm::StringMap<RangeSelectorOp<std::string>> & |
81 | getUnaryStringSelectors() { |
82 | static const llvm::StringMap<RangeSelectorOp<std::string>> M = { |
83 | {"name" , name}, |
84 | {"node" , node}, |
85 | {"statement" , statement}, |
86 | {"statements" , statements}, |
87 | {"member" , member}, |
88 | {"callArgs" , callArgs}, |
89 | {"elseBranch" , elseBranch}, |
90 | {"initListElements" , initListElements}}; |
91 | return M; |
92 | } |
93 | |
94 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & |
95 | getUnaryRangeSelectors() { |
96 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { |
97 | {"before" , before}, {"after" , after}, {"expansion" , expansion}}; |
98 | return M; |
99 | } |
100 | |
101 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & |
102 | getBinaryStringSelectors() { |
103 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { |
104 | {"encloseNodes" , encloseNodes}}; |
105 | return M; |
106 | } |
107 | |
108 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & |
109 | getBinaryRangeSelectors() { |
110 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> |
111 | M = {{"enclose" , enclose}, {"between" , between}}; |
112 | return M; |
113 | } |
114 | |
115 | template <typename Element> |
116 | std::optional<Element> findOptional(const llvm::StringMap<Element> &Map, |
117 | llvm::StringRef Key) { |
118 | auto it = Map.find(Key); |
119 | if (it == Map.end()) |
120 | return std::nullopt; |
121 | return it->second; |
122 | } |
123 | |
124 | template <typename ResultType> |
125 | ParseProgress<ResultType> makeParseProgress(ParseState State, |
126 | ResultType Result) { |
127 | return ParseProgress<ResultType>{State, std::move(Result)}; |
128 | } |
129 | |
130 | static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { |
131 | size_t Pos = S.OriginalInput.size() - S.Input.size(); |
132 | return llvm::make_error<ParseError>(Args&: Pos, Args: std::move(ErrorMsg), |
133 | Args: S.OriginalInput.substr(Start: Pos, N: 20).str()); |
134 | } |
135 | |
136 | // Returns a new ParseState that advances \c S by \c N characters. |
137 | static ParseState advance(ParseState S, size_t N) { |
138 | S.Input = S.Input.drop_front(N); |
139 | return S; |
140 | } |
141 | |
142 | static StringRef consumeWhitespace(StringRef S) { |
143 | return S.drop_while(F: [](char c) { return isASCII(c) && isWhitespace(c); }); |
144 | } |
145 | |
146 | // Parses a single expected character \c c from \c State, skipping preceding |
147 | // whitespace. Error if the expected character isn't found. |
148 | static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { |
149 | State.Input = consumeWhitespace(S: State.Input); |
150 | if (State.Input.empty() || State.Input.front() != c) |
151 | return makeParseError(S: State, |
152 | ErrorMsg: ("expected char not found: " + llvm::Twine(c)).str()); |
153 | return makeParseProgress(State: advance(S: State, N: 1), Result: std::nullopt); |
154 | } |
155 | |
156 | // Parses an identitifer "token" -- handles preceding whitespace. |
157 | static ExpectedProgress<std::string> parseId(ParseState State) { |
158 | State.Input = consumeWhitespace(S: State.Input); |
159 | auto Id = State.Input.take_while( |
160 | F: [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); |
161 | if (Id.empty()) |
162 | return makeParseError(S: State, ErrorMsg: "failed to parse name" ); |
163 | return makeParseProgress(State: advance(S: State, N: Id.size()), Result: Id.str()); |
164 | } |
165 | |
166 | // For consistency with the AST matcher parser and C++ code, node ids are |
167 | // written as strings. However, we do not support escaping in the string. |
168 | static ExpectedProgress<std::string> parseStringId(ParseState State) { |
169 | State.Input = consumeWhitespace(S: State.Input); |
170 | if (State.Input.empty()) |
171 | return makeParseError(S: State, ErrorMsg: "unexpected end of input" ); |
172 | if (!State.Input.consume_front(Prefix: "\"" )) |
173 | return makeParseError( |
174 | S: State, |
175 | ErrorMsg: "expecting string, but encountered other character or end of input" ); |
176 | |
177 | StringRef Id = State.Input.take_until(F: [](char c) { return c == '"'; }); |
178 | if (State.Input.size() == Id.size()) |
179 | return makeParseError(S: State, ErrorMsg: "unterminated string" ); |
180 | // Advance past the trailing quote as well. |
181 | return makeParseProgress(State: advance(S: State, N: Id.size() + 1), Result: Id.str()); |
182 | } |
183 | |
184 | // Parses a single element surrounded by parens. `Op` is applied to the parsed |
185 | // result to create the result of this function call. |
186 | template <typename T> |
187 | ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, |
188 | RangeSelectorOp<T> Op, |
189 | ParseState State) { |
190 | auto P = parseChar(c: '(', State); |
191 | if (!P) |
192 | return P.takeError(); |
193 | |
194 | auto E = ParseElement(P->State); |
195 | if (!E) |
196 | return E.takeError(); |
197 | |
198 | P = parseChar(')', E->State); |
199 | if (!P) |
200 | return P.takeError(); |
201 | |
202 | return makeParseProgress(P->State, Op(std::move(E->Value))); |
203 | } |
204 | |
205 | // Parses a pair of elements surrounded by parens and separated by comma. `Op` |
206 | // is applied to the parsed results to create the result of this function call. |
207 | template <typename T> |
208 | ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, |
209 | RangeSelectorOp<T, T> Op, |
210 | ParseState State) { |
211 | auto P = parseChar(c: '(', State); |
212 | if (!P) |
213 | return P.takeError(); |
214 | |
215 | auto Left = ParseElement(P->State); |
216 | if (!Left) |
217 | return Left.takeError(); |
218 | |
219 | P = parseChar(',', Left->State); |
220 | if (!P) |
221 | return P.takeError(); |
222 | |
223 | auto Right = ParseElement(P->State); |
224 | if (!Right) |
225 | return Right.takeError(); |
226 | |
227 | P = parseChar(')', Right->State); |
228 | if (!P) |
229 | return P.takeError(); |
230 | |
231 | return makeParseProgress(P->State, |
232 | Op(std::move(Left->Value), std::move(Right->Value))); |
233 | } |
234 | |
235 | // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or |
236 | // Id operator). Returns StencilType representing the operator on success and |
237 | // error if it fails to parse input for an operator. |
238 | static ExpectedProgress<RangeSelector> |
239 | parseRangeSelectorImpl(ParseState State) { |
240 | auto Id = parseId(State); |
241 | if (!Id) |
242 | return Id.takeError(); |
243 | |
244 | std::string OpName = std::move(Id->Value); |
245 | if (auto Op = findOptional(Map: getUnaryStringSelectors(), Key: OpName)) |
246 | return parseSingle(ParseElement: parseStringId, Op: *Op, State: Id->State); |
247 | |
248 | if (auto Op = findOptional(Map: getUnaryRangeSelectors(), Key: OpName)) |
249 | return parseSingle(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
250 | |
251 | if (auto Op = findOptional(Map: getBinaryStringSelectors(), Key: OpName)) |
252 | return parsePair(ParseElement: parseStringId, Op: *Op, State: Id->State); |
253 | |
254 | if (auto Op = findOptional(Map: getBinaryRangeSelectors(), Key: OpName)) |
255 | return parsePair(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
256 | |
257 | return makeParseError(S: State, ErrorMsg: "unknown selector name: " + OpName); |
258 | } |
259 | |
260 | Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { |
261 | ParseState State = {.Input: Input, .OriginalInput: Input}; |
262 | ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); |
263 | if (!Result) |
264 | return Result.takeError(); |
265 | State = Result->State; |
266 | // Discard any potentially trailing whitespace. |
267 | State.Input = consumeWhitespace(S: State.Input); |
268 | if (State.Input.empty()) |
269 | return Result->Value; |
270 | return makeParseError(S: State, ErrorMsg: "unexpected input after selector" ); |
271 | } |
272 | |