1 | //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Transformer/Parsing.h" |
10 | #include "clang/AST/Expr.h" |
11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
12 | #include "clang/Basic/CharInfo.h" |
13 | #include "clang/Basic/SourceLocation.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | #include "clang/Tooling/Transformer/RangeSelector.h" |
16 | #include "clang/Tooling/Transformer/SourceCode.h" |
17 | #include "llvm/ADT/StringMap.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Support/Errc.h" |
20 | #include "llvm/Support/Error.h" |
21 | #include <optional> |
22 | #include <string> |
23 | #include <utility> |
24 | #include <vector> |
25 | |
26 | using namespace clang; |
27 | using namespace transformer; |
28 | |
29 | // FIXME: This implementation is entirely separate from that of the AST |
30 | // matchers. Given the similarity of the languages and uses of the two parsers, |
31 | // the two should share a common parsing infrastructure, as should other |
32 | // Transformer types. We intend to unify this implementation soon to share as |
33 | // much as possible with the AST Matchers parsing. |
34 | |
35 | namespace { |
36 | using llvm::Expected; |
37 | |
38 | template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); |
39 | |
40 | struct ParseState { |
41 | // The remaining input to be processed. |
42 | StringRef Input; |
43 | // The original input. Not modified during parsing; only for reference in |
44 | // error reporting. |
45 | StringRef OriginalInput; |
46 | }; |
47 | |
48 | // Represents an intermediate result returned by a parsing function. Functions |
49 | // that don't generate values should use `std::nullopt` |
50 | template <typename ResultType> struct ParseProgress { |
51 | ParseState State; |
52 | // Intermediate result generated by the Parser. |
53 | ResultType Value; |
54 | }; |
55 | |
56 | template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; |
57 | template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); |
58 | |
59 | class ParseError : public llvm::ErrorInfo<ParseError> { |
60 | public: |
61 | // Required field for all ErrorInfo derivatives. |
62 | static char ID; |
63 | |
64 | ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) |
65 | : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), |
66 | Excerpt(std::move(InputExcerpt)) {} |
67 | |
68 | void log(llvm::raw_ostream &OS) const override { |
69 | OS << "parse error at position (" << Pos << "): " << ErrorMsg |
70 | << ": " + Excerpt; |
71 | } |
72 | |
73 | std::error_code convertToErrorCode() const override { |
74 | return llvm::inconvertibleErrorCode(); |
75 | } |
76 | |
77 | // Position of the error in the input string. |
78 | size_t Pos; |
79 | std::string ErrorMsg; |
80 | // Excerpt of the input starting at the error position. |
81 | std::string Excerpt; |
82 | }; |
83 | |
84 | char ParseError::ID; |
85 | } // namespace |
86 | |
87 | static const llvm::StringMap<RangeSelectorOp<std::string>> & |
88 | getUnaryStringSelectors() { |
89 | static const llvm::StringMap<RangeSelectorOp<std::string>> M = { |
90 | {"name" , name}, |
91 | {"node" , node}, |
92 | {"statement" , statement}, |
93 | {"statements" , statements}, |
94 | {"member" , member}, |
95 | {"callArgs" , callArgs}, |
96 | {"elseBranch" , elseBranch}, |
97 | {"initListElements" , initListElements}}; |
98 | return M; |
99 | } |
100 | |
101 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & |
102 | getUnaryRangeSelectors() { |
103 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { |
104 | {"before" , before}, {"after" , after}, {"expansion" , expansion}}; |
105 | return M; |
106 | } |
107 | |
108 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & |
109 | getBinaryStringSelectors() { |
110 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { |
111 | {"encloseNodes" , encloseNodes}}; |
112 | return M; |
113 | } |
114 | |
115 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & |
116 | getBinaryRangeSelectors() { |
117 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> |
118 | M = {{"enclose" , enclose}, {"between" , between}}; |
119 | return M; |
120 | } |
121 | |
122 | template <typename Element> |
123 | std::optional<Element> findOptional(const llvm::StringMap<Element> &Map, |
124 | llvm::StringRef Key) { |
125 | auto it = Map.find(Key); |
126 | if (it == Map.end()) |
127 | return std::nullopt; |
128 | return it->second; |
129 | } |
130 | |
131 | template <typename ResultType> |
132 | ParseProgress<ResultType> makeParseProgress(ParseState State, |
133 | ResultType Result) { |
134 | return ParseProgress<ResultType>{State, std::move(Result)}; |
135 | } |
136 | |
137 | static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { |
138 | size_t Pos = S.OriginalInput.size() - S.Input.size(); |
139 | return llvm::make_error<ParseError>(Args&: Pos, Args: std::move(ErrorMsg), |
140 | Args: S.OriginalInput.substr(Start: Pos, N: 20).str()); |
141 | } |
142 | |
143 | // Returns a new ParseState that advances \c S by \c N characters. |
144 | static ParseState advance(ParseState S, size_t N) { |
145 | S.Input = S.Input.drop_front(N); |
146 | return S; |
147 | } |
148 | |
149 | static StringRef consumeWhitespace(StringRef S) { |
150 | return S.drop_while(F: [](char c) { return isASCII(c) && isWhitespace(c); }); |
151 | } |
152 | |
153 | // Parses a single expected character \c c from \c State, skipping preceding |
154 | // whitespace. Error if the expected character isn't found. |
155 | static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { |
156 | State.Input = consumeWhitespace(S: State.Input); |
157 | if (State.Input.empty() || State.Input.front() != c) |
158 | return makeParseError(S: State, |
159 | ErrorMsg: ("expected char not found: " + llvm::Twine(c)).str()); |
160 | return makeParseProgress(State: advance(S: State, N: 1), Result: std::nullopt); |
161 | } |
162 | |
163 | // Parses an identitifer "token" -- handles preceding whitespace. |
164 | static ExpectedProgress<std::string> parseId(ParseState State) { |
165 | State.Input = consumeWhitespace(S: State.Input); |
166 | auto Id = State.Input.take_while( |
167 | F: [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); |
168 | if (Id.empty()) |
169 | return makeParseError(S: State, ErrorMsg: "failed to parse name" ); |
170 | return makeParseProgress(State: advance(S: State, N: Id.size()), Result: Id.str()); |
171 | } |
172 | |
173 | // For consistency with the AST matcher parser and C++ code, node ids are |
174 | // written as strings. However, we do not support escaping in the string. |
175 | static ExpectedProgress<std::string> parseStringId(ParseState State) { |
176 | State.Input = consumeWhitespace(S: State.Input); |
177 | if (State.Input.empty()) |
178 | return makeParseError(S: State, ErrorMsg: "unexpected end of input" ); |
179 | if (!State.Input.consume_front(Prefix: "\"" )) |
180 | return makeParseError( |
181 | S: State, |
182 | ErrorMsg: "expecting string, but encountered other character or end of input" ); |
183 | |
184 | StringRef Id = State.Input.take_until(F: [](char c) { return c == '"'; }); |
185 | if (State.Input.size() == Id.size()) |
186 | return makeParseError(S: State, ErrorMsg: "unterminated string" ); |
187 | // Advance past the trailing quote as well. |
188 | return makeParseProgress(State: advance(S: State, N: Id.size() + 1), Result: Id.str()); |
189 | } |
190 | |
191 | // Parses a single element surrounded by parens. `Op` is applied to the parsed |
192 | // result to create the result of this function call. |
193 | template <typename T> |
194 | ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, |
195 | RangeSelectorOp<T> Op, |
196 | ParseState State) { |
197 | auto P = parseChar(c: '(', State); |
198 | if (!P) |
199 | return P.takeError(); |
200 | |
201 | auto E = ParseElement(P->State); |
202 | if (!E) |
203 | return E.takeError(); |
204 | |
205 | P = parseChar(')', E->State); |
206 | if (!P) |
207 | return P.takeError(); |
208 | |
209 | return makeParseProgress(P->State, Op(std::move(E->Value))); |
210 | } |
211 | |
212 | // Parses a pair of elements surrounded by parens and separated by comma. `Op` |
213 | // is applied to the parsed results to create the result of this function call. |
214 | template <typename T> |
215 | ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, |
216 | RangeSelectorOp<T, T> Op, |
217 | ParseState State) { |
218 | auto P = parseChar(c: '(', State); |
219 | if (!P) |
220 | return P.takeError(); |
221 | |
222 | auto Left = ParseElement(P->State); |
223 | if (!Left) |
224 | return Left.takeError(); |
225 | |
226 | P = parseChar(',', Left->State); |
227 | if (!P) |
228 | return P.takeError(); |
229 | |
230 | auto Right = ParseElement(P->State); |
231 | if (!Right) |
232 | return Right.takeError(); |
233 | |
234 | P = parseChar(')', Right->State); |
235 | if (!P) |
236 | return P.takeError(); |
237 | |
238 | return makeParseProgress(P->State, |
239 | Op(std::move(Left->Value), std::move(Right->Value))); |
240 | } |
241 | |
242 | // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or |
243 | // Id operator). Returns StencilType representing the operator on success and |
244 | // error if it fails to parse input for an operator. |
245 | static ExpectedProgress<RangeSelector> |
246 | parseRangeSelectorImpl(ParseState State) { |
247 | auto Id = parseId(State); |
248 | if (!Id) |
249 | return Id.takeError(); |
250 | |
251 | std::string OpName = std::move(Id->Value); |
252 | if (auto Op = findOptional(Map: getUnaryStringSelectors(), Key: OpName)) |
253 | return parseSingle(ParseElement: parseStringId, Op: *Op, State: Id->State); |
254 | |
255 | if (auto Op = findOptional(Map: getUnaryRangeSelectors(), Key: OpName)) |
256 | return parseSingle(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
257 | |
258 | if (auto Op = findOptional(Map: getBinaryStringSelectors(), Key: OpName)) |
259 | return parsePair(ParseElement: parseStringId, Op: *Op, State: Id->State); |
260 | |
261 | if (auto Op = findOptional(Map: getBinaryRangeSelectors(), Key: OpName)) |
262 | return parsePair(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
263 | |
264 | return makeParseError(S: State, ErrorMsg: "unknown selector name: " + OpName); |
265 | } |
266 | |
267 | Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { |
268 | ParseState State = {.Input: Input, .OriginalInput: Input}; |
269 | ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); |
270 | if (!Result) |
271 | return Result.takeError(); |
272 | State = Result->State; |
273 | // Discard any potentially trailing whitespace. |
274 | State.Input = consumeWhitespace(S: State.Input); |
275 | if (State.Input.empty()) |
276 | return Result->Value; |
277 | return makeParseError(S: State, ErrorMsg: "unexpected input after selector" ); |
278 | } |
279 | |