1 | extern crate proc_macro; |
2 | |
3 | use quote::quote; |
4 | use syn::{braced, Token}; |
5 | |
6 | use std::collections::HashSet; |
7 | use syn::ext::IdentExt; |
8 | use syn::parse::{Parse, ParseStream, Result}; |
9 | |
10 | /// Implements the `match_token!()` macro for use by the HTML tree builder |
11 | /// in `src/tree_builder/rules.rs`. |
12 | /// |
13 | /// ## Example |
14 | /// |
15 | /// ```rust,ignore |
16 | /// match_token!(token { |
17 | /// CommentToken(text) => 1, |
18 | /// tag @ <base> <link> <meta> => 2, |
19 | /// </head> => 3, |
20 | /// </body> </html> </br> => else, |
21 | /// tag @ </_> => 4, |
22 | /// token => 5, |
23 | /// }) |
24 | /// ``` |
25 | /// |
26 | /// ## Syntax |
27 | /// Because of the simplistic parser, the macro invocation must |
28 | /// start with exactly `match_token!(token {` (with whitespace as specified) |
29 | /// and end with exactly `})`. |
30 | /// The left-hand side of each match arm is an optional `name @` binding, followed by |
31 | /// - an ordinary Rust pattern that starts with an identifier or an underscore, or |
32 | /// - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>" |
33 | /// to match an open or close tag respectively, or |
34 | /// - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags |
35 | /// respectively. |
36 | /// |
37 | /// The right-hand side is either an expression or the keyword `else`. |
38 | /// Note that this syntax does not support guards or pattern alternation like |
39 | /// `Foo | Bar`. This is not a fundamental limitation; it's done for implementation |
40 | /// simplicity. |
41 | /// ## Semantics |
42 | /// Ordinary Rust patterns match as usual. If present, the `name @` binding has |
43 | /// the usual meaning. |
44 | /// A sequence of named tags matches any of those tags. A single sequence can |
45 | /// contain both open and close tags. If present, the `name @` binding binds (by |
46 | /// move) the `Tag` struct, not the outer `Token`. That is, a match arm like |
47 | /// ```rust,ignore |
48 | /// tag @ <html> <head> => ... |
49 | /// ``` |
50 | /// expands to something like |
51 | /// ```rust,ignore |
52 | /// TagToken(tag @ Tag { name: local_name!("html" ), kind: StartTag }) |
53 | /// | TagToken(tag @ Tag { name: local_name!("head" ), kind: StartTag }) => ... |
54 | /// ``` |
55 | /// A wildcard tag matches any tag of the appropriate kind, *unless* it was |
56 | /// previously matched with an `else` right-hand side (more on this below). |
57 | /// The expansion of this macro reorders code somewhat, to satisfy various |
58 | /// restrictions arising from moves. However it provides the semantics of in-order |
59 | /// matching, by enforcing the following restrictions on its input: |
60 | /// - The last pattern must be a variable or the wildcard "_". In other words |
61 | /// it must match everything. |
62 | /// - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear |
63 | /// after wildcard tag patterns. |
64 | /// - No tag name may appear more than once. |
65 | /// - A wildcard tag pattern may not occur in the same arm as any other tag. |
66 | /// "<_> <html> => ..." and "<_> </_> => ..." are both forbidden. |
67 | /// - The right-hand side "else" may only appear with specific-tag patterns. |
68 | /// It means that these specific tags should be handled by the last, |
69 | /// catch-all case arm, rather than by any wildcard tag arm. This situation |
70 | /// is common in the HTML5 syntax. |
71 | #[proc_macro ] |
72 | pub fn match_token(input: proc_macro::TokenStream) -> proc_macro::TokenStream { |
73 | let input: TokenStream = proc_macro2::TokenStream::from(input); |
74 | |
75 | let match_token: MatchToken = syn::parse2::<MatchToken>(input).expect(msg:"Parsing match_token! input failed" ); |
76 | let output: TokenStream = expand_match_token_macro(match_token); |
77 | |
78 | proc_macro::TokenStream::from(output) |
79 | } |
80 | |
81 | struct MatchToken { |
82 | ident: syn::Ident, |
83 | arms: Vec<MatchTokenArm>, |
84 | } |
85 | |
86 | struct MatchTokenArm { |
87 | binding: Option<syn::Ident>, |
88 | lhs: Lhs, |
89 | rhs: Rhs, |
90 | } |
91 | |
92 | enum Lhs { |
93 | Tags(Vec<Tag>), |
94 | Pattern(syn::Pat), |
95 | } |
96 | |
97 | enum Rhs { |
98 | Expression(syn::Expr), |
99 | Else, |
100 | } |
101 | |
102 | #[derive (PartialEq, Eq, Hash, Clone)] |
103 | enum TagKind { |
104 | StartTag, |
105 | EndTag, |
106 | } |
107 | |
108 | // Option is None if wildcard |
109 | #[derive (PartialEq, Eq, Hash, Clone)] |
110 | struct Tag { |
111 | kind: TagKind, |
112 | name: Option<syn::Ident>, |
113 | } |
114 | |
115 | impl Parse for Tag { |
116 | fn parse(input: ParseStream) -> Result<Self> { |
117 | input.parse::<Token![<]>()?; |
118 | let closing: Option<Token![/]> = input.parse()?; |
119 | let name: Option = match input.call(function:syn::Ident::parse_any)? { |
120 | ref wildcard: &Ident if wildcard == "_" => None, |
121 | other: Ident => Some(other), |
122 | }; |
123 | input.parse::<Token![>]>()?; |
124 | Ok(Tag { |
125 | kind: if closing.is_some() { |
126 | TagKind::EndTag |
127 | } else { |
128 | TagKind::StartTag |
129 | }, |
130 | name, |
131 | }) |
132 | } |
133 | } |
134 | |
135 | impl Parse for Lhs { |
136 | fn parse(input: ParseStream) -> Result<Self> { |
137 | if input.peek(Token![<]) { |
138 | let mut tags: Vec = Vec::new(); |
139 | while !input.peek(Token![=>]) { |
140 | tags.push(input.parse()?); |
141 | } |
142 | Ok(Lhs::Tags(tags)) |
143 | } else { |
144 | let p: Pat = input.call(function:syn::Pat::parse_single)?; |
145 | Ok(Lhs::Pattern(p)) |
146 | } |
147 | } |
148 | } |
149 | |
150 | impl Parse for MatchTokenArm { |
151 | fn parse(input: ParseStream) -> Result<Self> { |
152 | let binding = if input.peek2(Token![@]) { |
153 | let binding = input.parse::<syn::Ident>()?; |
154 | input.parse::<Token![@]>()?; |
155 | Some(binding) |
156 | } else { |
157 | None |
158 | }; |
159 | let lhs = input.parse::<Lhs>()?; |
160 | input.parse::<Token![=>]>()?; |
161 | let rhs = if input.peek(syn::token::Brace) { |
162 | let block = input.parse::<syn::Block>().unwrap(); |
163 | let block = syn::ExprBlock { |
164 | attrs: vec![], |
165 | label: None, |
166 | block, |
167 | }; |
168 | input.parse::<Option<Token![,]>>()?; |
169 | Rhs::Expression(syn::Expr::Block(block)) |
170 | } else if input.peek(Token![else]) { |
171 | input.parse::<Token![else]>()?; |
172 | input.parse::<Token![,]>()?; |
173 | Rhs::Else |
174 | } else { |
175 | let expr = input.parse::<syn::Expr>().unwrap(); |
176 | input.parse::<Option<Token![,]>>()?; |
177 | Rhs::Expression(expr) |
178 | }; |
179 | |
180 | Ok(MatchTokenArm { binding, lhs, rhs }) |
181 | } |
182 | } |
183 | |
184 | impl Parse for MatchToken { |
185 | fn parse(input: ParseStream) -> Result<Self> { |
186 | let ident: Ident = input.parse::<syn::Ident>()?; |
187 | let content: ParseBuffer<'_>; |
188 | braced!(content in input); |
189 | let mut arms: Vec = vec![]; |
190 | while !content.is_empty() { |
191 | arms.push(content.parse()?); |
192 | } |
193 | Ok(MatchToken { ident, arms }) |
194 | } |
195 | } |
196 | |
197 | fn expand_match_token_macro(match_token: MatchToken) -> proc_macro2::TokenStream { |
198 | let mut arms = match_token.arms; |
199 | let to_be_matched = match_token.ident; |
200 | // Handle the last arm specially at the end. |
201 | let last_arm = arms.pop().unwrap(); |
202 | |
203 | // Tags we've seen, used for detecting duplicates. |
204 | let mut seen_tags: HashSet<Tag> = HashSet::new(); |
205 | |
206 | // Case arms for wildcard matching. We collect these and |
207 | // emit them later. |
208 | let mut wildcards_patterns: Vec<proc_macro2::TokenStream> = Vec::new(); |
209 | let mut wildcards_expressions: Vec<syn::Expr> = Vec::new(); |
210 | |
211 | // Tags excluded (by an 'else' RHS) from wildcard matching. |
212 | let mut wild_excluded_patterns: Vec<proc_macro2::TokenStream> = Vec::new(); |
213 | |
214 | let mut arms_code = Vec::new(); |
215 | |
216 | for MatchTokenArm { binding, lhs, rhs } in arms { |
217 | // Build Rust syntax for the `name @` binding, if any. |
218 | let binding = match binding { |
219 | Some(ident) => quote!(#ident @), |
220 | None => quote!(), |
221 | }; |
222 | |
223 | match (lhs, rhs) { |
224 | (Lhs::Pattern(_), Rhs::Else) => { |
225 | panic!("'else' may not appear with an ordinary pattern" ) |
226 | }, |
227 | |
228 | // ordinary pattern => expression |
229 | (Lhs::Pattern(pat), Rhs::Expression(expr)) => { |
230 | if !wildcards_patterns.is_empty() { |
231 | panic!("ordinary patterns may not appear after wildcard tags" ); |
232 | } |
233 | arms_code.push(quote!(#binding #pat => #expr,)) |
234 | }, |
235 | |
236 | // <tag> <tag> ... => else |
237 | (Lhs::Tags(tags), Rhs::Else) => { |
238 | for tag in tags { |
239 | if !seen_tags.insert(tag.clone()) { |
240 | panic!("duplicate tag" ); |
241 | } |
242 | if tag.name.is_none() { |
243 | panic!("'else' may not appear with a wildcard tag" ); |
244 | } |
245 | wild_excluded_patterns |
246 | .push(make_tag_pattern(&proc_macro2::TokenStream::new(), tag)); |
247 | } |
248 | }, |
249 | |
250 | // <_> => expression |
251 | // <tag> <tag> ... => expression |
252 | (Lhs::Tags(tags), Rhs::Expression(expr)) => { |
253 | // Is this arm a tag wildcard? |
254 | // `None` if we haven't processed the first tag yet. |
255 | let mut wildcard = None; |
256 | for tag in tags { |
257 | if !seen_tags.insert(tag.clone()) { |
258 | panic!("duplicate tag" ); |
259 | } |
260 | |
261 | match tag.name { |
262 | // <tag> |
263 | Some(_) => { |
264 | if !wildcards_patterns.is_empty() { |
265 | panic!("specific tags may not appear after wildcard tags" ); |
266 | } |
267 | |
268 | if wildcard == Some(true) { |
269 | panic!("wildcard tags must appear alone" ); |
270 | } |
271 | |
272 | if wildcard.is_some() { |
273 | // Push the delimiter `|` if it's not the first tag. |
274 | arms_code.push(quote!( | )) |
275 | } |
276 | arms_code.push(make_tag_pattern(&binding, tag)); |
277 | |
278 | wildcard = Some(false); |
279 | }, |
280 | |
281 | // <_> |
282 | None => { |
283 | if wildcard.is_some() { |
284 | panic!("wildcard tags must appear alone" ); |
285 | } |
286 | wildcard = Some(true); |
287 | wildcards_patterns.push(make_tag_pattern(&binding, tag)); |
288 | wildcards_expressions.push(expr.clone()); |
289 | }, |
290 | } |
291 | } |
292 | |
293 | match wildcard { |
294 | None => panic!("[internal macro error] tag arm with no tags" ), |
295 | Some(false) => arms_code.push(quote!( => #expr,)), |
296 | Some(true) => {}, // codegen for wildcards is deferred |
297 | } |
298 | }, |
299 | } |
300 | } |
301 | |
302 | // Time to process the last, catch-all arm. We will generate something like |
303 | // |
304 | // last_arm_token => { |
305 | // let enable_wildcards = match last_arm_token { |
306 | // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, |
307 | // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, |
308 | // // ... |
309 | // _ => true, |
310 | // }; |
311 | // |
312 | // match (enable_wildcards, last_arm_token) { |
313 | // (true, TagToken(name @ Tag { kind: StartTag, .. })) |
314 | // => ..., // wildcard action for start tags |
315 | // |
316 | // (true, TagToken(name @ Tag { kind: EndTag, .. })) |
317 | // => ..., // wildcard action for end tags |
318 | // |
319 | // (_, token) => ... // using the pattern from that last arm |
320 | // } |
321 | // } |
322 | |
323 | let MatchTokenArm { binding, lhs, rhs } = last_arm; |
324 | |
325 | let (last_pat, last_expr) = match (binding, lhs, rhs) { |
326 | (Some(_), _, _) => panic!("the last arm cannot have an @-binding" ), |
327 | (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns" ), |
328 | (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'" ), |
329 | (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e), |
330 | }; |
331 | |
332 | quote! { |
333 | match #to_be_matched { |
334 | #( |
335 | #arms_code |
336 | )* |
337 | last_arm_token => { |
338 | let enable_wildcards = match last_arm_token { |
339 | #( |
340 | #wild_excluded_patterns => false, |
341 | )* |
342 | _ => true, |
343 | }; |
344 | match (enable_wildcards, last_arm_token) { |
345 | #( |
346 | (true, #wildcards_patterns) => #wildcards_expressions, |
347 | )* |
348 | (_, #last_pat) => #last_expr, |
349 | } |
350 | } |
351 | } |
352 | } |
353 | } |
354 | |
355 | fn make_tag_pattern(binding: &proc_macro2::TokenStream, tag: Tag) -> proc_macro2::TokenStream { |
356 | let kind: TokenStream = match tag.kind { |
357 | TagKind::StartTag => quote!(crate::tokenizer::StartTag), |
358 | TagKind::EndTag => quote!(crate::tokenizer::EndTag), |
359 | }; |
360 | let name_field: TokenStream = if let Some(name: Ident) = tag.name { |
361 | let name: String = name.to_string(); |
362 | quote!(name: local_name!(#name),) |
363 | } else { |
364 | quote!() |
365 | }; |
366 | quote! { |
367 | crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) |
368 | } |
369 | } |
370 | |