| 1 | extern crate proc_macro; |
| 2 | |
| 3 | use quote::quote; |
| 4 | use syn::{braced, Token}; |
| 5 | |
| 6 | use std::collections::HashSet; |
| 7 | use syn::ext::IdentExt; |
| 8 | use syn::parse::{Parse, ParseStream, Result}; |
| 9 | |
| 10 | /// Implements the `match_token!()` macro for use by the HTML tree builder |
| 11 | /// in `src/tree_builder/rules.rs`. |
| 12 | /// |
| 13 | /// ## Example |
| 14 | /// |
| 15 | /// ```rust,ignore |
| 16 | /// match_token!(token { |
| 17 | /// CommentToken(text) => 1, |
| 18 | /// tag @ <base> <link> <meta> => 2, |
| 19 | /// </head> => 3, |
| 20 | /// </body> </html> </br> => else, |
| 21 | /// tag @ </_> => 4, |
| 22 | /// token => 5, |
| 23 | /// }) |
| 24 | /// ``` |
| 25 | /// |
| 26 | /// ## Syntax |
| 27 | /// Because of the simplistic parser, the macro invocation must |
| 28 | /// start with exactly `match_token!(token {` (with whitespace as specified) |
| 29 | /// and end with exactly `})`. |
| 30 | /// The left-hand side of each match arm is an optional `name @` binding, followed by |
| 31 | /// - an ordinary Rust pattern that starts with an identifier or an underscore, or |
| 32 | /// - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>" |
| 33 | /// to match an open or close tag respectively, or |
| 34 | /// - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags |
| 35 | /// respectively. |
| 36 | /// |
| 37 | /// The right-hand side is either an expression or the keyword `else`. |
| 38 | /// Note that this syntax does not support guards or pattern alternation like |
| 39 | /// `Foo | Bar`. This is not a fundamental limitation; it's done for implementation |
| 40 | /// simplicity. |
| 41 | /// ## Semantics |
| 42 | /// Ordinary Rust patterns match as usual. If present, the `name @` binding has |
| 43 | /// the usual meaning. |
| 44 | /// A sequence of named tags matches any of those tags. A single sequence can |
| 45 | /// contain both open and close tags. If present, the `name @` binding binds (by |
| 46 | /// move) the `Tag` struct, not the outer `Token`. That is, a match arm like |
| 47 | /// ```rust,ignore |
| 48 | /// tag @ <html> <head> => ... |
| 49 | /// ``` |
| 50 | /// expands to something like |
| 51 | /// ```rust,ignore |
| 52 | /// TagToken(tag @ Tag { name: local_name!("html" ), kind: StartTag }) |
| 53 | /// | TagToken(tag @ Tag { name: local_name!("head" ), kind: StartTag }) => ... |
| 54 | /// ``` |
| 55 | /// A wildcard tag matches any tag of the appropriate kind, *unless* it was |
| 56 | /// previously matched with an `else` right-hand side (more on this below). |
| 57 | /// The expansion of this macro reorders code somewhat, to satisfy various |
| 58 | /// restrictions arising from moves. However it provides the semantics of in-order |
| 59 | /// matching, by enforcing the following restrictions on its input: |
| 60 | /// - The last pattern must be a variable or the wildcard "_". In other words |
| 61 | /// it must match everything. |
| 62 | /// - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear |
| 63 | /// after wildcard tag patterns. |
| 64 | /// - No tag name may appear more than once. |
| 65 | /// - A wildcard tag pattern may not occur in the same arm as any other tag. |
| 66 | /// "<_> <html> => ..." and "<_> </_> => ..." are both forbidden. |
| 67 | /// - The right-hand side "else" may only appear with specific-tag patterns. |
| 68 | /// It means that these specific tags should be handled by the last, |
| 69 | /// catch-all case arm, rather than by any wildcard tag arm. This situation |
| 70 | /// is common in the HTML5 syntax. |
| 71 | #[proc_macro ] |
| 72 | pub fn match_token(input: proc_macro::TokenStream) -> proc_macro::TokenStream { |
| 73 | let input: TokenStream = proc_macro2::TokenStream::from(input); |
| 74 | |
| 75 | let match_token: MatchToken = syn::parse2::<MatchToken>(input).expect(msg:"Parsing match_token! input failed" ); |
| 76 | let output: TokenStream = expand_match_token_macro(match_token); |
| 77 | |
| 78 | proc_macro::TokenStream::from(output) |
| 79 | } |
| 80 | |
| 81 | struct MatchToken { |
| 82 | ident: syn::Ident, |
| 83 | arms: Vec<MatchTokenArm>, |
| 84 | } |
| 85 | |
| 86 | struct MatchTokenArm { |
| 87 | binding: Option<syn::Ident>, |
| 88 | lhs: Lhs, |
| 89 | rhs: Rhs, |
| 90 | } |
| 91 | |
| 92 | enum Lhs { |
| 93 | Tags(Vec<Tag>), |
| 94 | Pattern(syn::Pat), |
| 95 | } |
| 96 | |
| 97 | enum Rhs { |
| 98 | Expression(syn::Expr), |
| 99 | Else, |
| 100 | } |
| 101 | |
| 102 | #[derive (PartialEq, Eq, Hash, Clone)] |
| 103 | enum TagKind { |
| 104 | StartTag, |
| 105 | EndTag, |
| 106 | } |
| 107 | |
| 108 | // Option is None if wildcard |
| 109 | #[derive (PartialEq, Eq, Hash, Clone)] |
| 110 | struct Tag { |
| 111 | kind: TagKind, |
| 112 | name: Option<syn::Ident>, |
| 113 | } |
| 114 | |
| 115 | impl Parse for Tag { |
| 116 | fn parse(input: ParseStream) -> Result<Self> { |
| 117 | input.parse::<Token![<]>()?; |
| 118 | let closing: Option<Token![/]> = input.parse()?; |
| 119 | let name: Option = match input.call(function:syn::Ident::parse_any)? { |
| 120 | ref wildcard: &Ident if wildcard == "_" => None, |
| 121 | other: Ident => Some(other), |
| 122 | }; |
| 123 | input.parse::<Token![>]>()?; |
| 124 | Ok(Tag { |
| 125 | kind: if closing.is_some() { |
| 126 | TagKind::EndTag |
| 127 | } else { |
| 128 | TagKind::StartTag |
| 129 | }, |
| 130 | name, |
| 131 | }) |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | impl Parse for Lhs { |
| 136 | fn parse(input: ParseStream) -> Result<Self> { |
| 137 | if input.peek(Token![<]) { |
| 138 | let mut tags: Vec = Vec::new(); |
| 139 | while !input.peek(Token![=>]) { |
| 140 | tags.push(input.parse()?); |
| 141 | } |
| 142 | Ok(Lhs::Tags(tags)) |
| 143 | } else { |
| 144 | let p: Pat = input.call(function:syn::Pat::parse_single)?; |
| 145 | Ok(Lhs::Pattern(p)) |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | impl Parse for MatchTokenArm { |
| 151 | fn parse(input: ParseStream) -> Result<Self> { |
| 152 | let binding = if input.peek2(Token![@]) { |
| 153 | let binding = input.parse::<syn::Ident>()?; |
| 154 | input.parse::<Token![@]>()?; |
| 155 | Some(binding) |
| 156 | } else { |
| 157 | None |
| 158 | }; |
| 159 | let lhs = input.parse::<Lhs>()?; |
| 160 | input.parse::<Token![=>]>()?; |
| 161 | let rhs = if input.peek(syn::token::Brace) { |
| 162 | let block = input.parse::<syn::Block>().unwrap(); |
| 163 | let block = syn::ExprBlock { |
| 164 | attrs: vec![], |
| 165 | label: None, |
| 166 | block, |
| 167 | }; |
| 168 | input.parse::<Option<Token![,]>>()?; |
| 169 | Rhs::Expression(syn::Expr::Block(block)) |
| 170 | } else if input.peek(Token![else]) { |
| 171 | input.parse::<Token![else]>()?; |
| 172 | input.parse::<Token![,]>()?; |
| 173 | Rhs::Else |
| 174 | } else { |
| 175 | let expr = input.parse::<syn::Expr>().unwrap(); |
| 176 | input.parse::<Option<Token![,]>>()?; |
| 177 | Rhs::Expression(expr) |
| 178 | }; |
| 179 | |
| 180 | Ok(MatchTokenArm { binding, lhs, rhs }) |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | impl Parse for MatchToken { |
| 185 | fn parse(input: ParseStream) -> Result<Self> { |
| 186 | let ident: Ident = input.parse::<syn::Ident>()?; |
| 187 | let content: ParseBuffer<'_>; |
| 188 | braced!(content in input); |
| 189 | let mut arms: Vec = vec![]; |
| 190 | while !content.is_empty() { |
| 191 | arms.push(content.parse()?); |
| 192 | } |
| 193 | Ok(MatchToken { ident, arms }) |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | fn expand_match_token_macro(match_token: MatchToken) -> proc_macro2::TokenStream { |
| 198 | let mut arms = match_token.arms; |
| 199 | let to_be_matched = match_token.ident; |
| 200 | // Handle the last arm specially at the end. |
| 201 | let last_arm = arms.pop().unwrap(); |
| 202 | |
| 203 | // Tags we've seen, used for detecting duplicates. |
| 204 | let mut seen_tags: HashSet<Tag> = HashSet::new(); |
| 205 | |
| 206 | // Case arms for wildcard matching. We collect these and |
| 207 | // emit them later. |
| 208 | let mut wildcards_patterns: Vec<proc_macro2::TokenStream> = Vec::new(); |
| 209 | let mut wildcards_expressions: Vec<syn::Expr> = Vec::new(); |
| 210 | |
| 211 | // Tags excluded (by an 'else' RHS) from wildcard matching. |
| 212 | let mut wild_excluded_patterns: Vec<proc_macro2::TokenStream> = Vec::new(); |
| 213 | |
| 214 | let mut arms_code = Vec::new(); |
| 215 | |
| 216 | for MatchTokenArm { binding, lhs, rhs } in arms { |
| 217 | // Build Rust syntax for the `name @` binding, if any. |
| 218 | let binding = match binding { |
| 219 | Some(ident) => quote!(#ident @), |
| 220 | None => quote!(), |
| 221 | }; |
| 222 | |
| 223 | match (lhs, rhs) { |
| 224 | (Lhs::Pattern(_), Rhs::Else) => { |
| 225 | panic!("'else' may not appear with an ordinary pattern" ) |
| 226 | }, |
| 227 | |
| 228 | // ordinary pattern => expression |
| 229 | (Lhs::Pattern(pat), Rhs::Expression(expr)) => { |
| 230 | if !wildcards_patterns.is_empty() { |
| 231 | panic!("ordinary patterns may not appear after wildcard tags" ); |
| 232 | } |
| 233 | arms_code.push(quote!(#binding #pat => #expr,)) |
| 234 | }, |
| 235 | |
| 236 | // <tag> <tag> ... => else |
| 237 | (Lhs::Tags(tags), Rhs::Else) => { |
| 238 | for tag in tags { |
| 239 | if !seen_tags.insert(tag.clone()) { |
| 240 | panic!("duplicate tag" ); |
| 241 | } |
| 242 | if tag.name.is_none() { |
| 243 | panic!("'else' may not appear with a wildcard tag" ); |
| 244 | } |
| 245 | wild_excluded_patterns |
| 246 | .push(make_tag_pattern(&proc_macro2::TokenStream::new(), tag)); |
| 247 | } |
| 248 | }, |
| 249 | |
| 250 | // <_> => expression |
| 251 | // <tag> <tag> ... => expression |
| 252 | (Lhs::Tags(tags), Rhs::Expression(expr)) => { |
| 253 | // Is this arm a tag wildcard? |
| 254 | // `None` if we haven't processed the first tag yet. |
| 255 | let mut wildcard = None; |
| 256 | for tag in tags { |
| 257 | if !seen_tags.insert(tag.clone()) { |
| 258 | panic!("duplicate tag" ); |
| 259 | } |
| 260 | |
| 261 | match tag.name { |
| 262 | // <tag> |
| 263 | Some(_) => { |
| 264 | if !wildcards_patterns.is_empty() { |
| 265 | panic!("specific tags may not appear after wildcard tags" ); |
| 266 | } |
| 267 | |
| 268 | if wildcard == Some(true) { |
| 269 | panic!("wildcard tags must appear alone" ); |
| 270 | } |
| 271 | |
| 272 | if wildcard.is_some() { |
| 273 | // Push the delimiter `|` if it's not the first tag. |
| 274 | arms_code.push(quote!( | )) |
| 275 | } |
| 276 | arms_code.push(make_tag_pattern(&binding, tag)); |
| 277 | |
| 278 | wildcard = Some(false); |
| 279 | }, |
| 280 | |
| 281 | // <_> |
| 282 | None => { |
| 283 | if wildcard.is_some() { |
| 284 | panic!("wildcard tags must appear alone" ); |
| 285 | } |
| 286 | wildcard = Some(true); |
| 287 | wildcards_patterns.push(make_tag_pattern(&binding, tag)); |
| 288 | wildcards_expressions.push(expr.clone()); |
| 289 | }, |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | match wildcard { |
| 294 | None => panic!("[internal macro error] tag arm with no tags" ), |
| 295 | Some(false) => arms_code.push(quote!( => #expr,)), |
| 296 | Some(true) => {}, // codegen for wildcards is deferred |
| 297 | } |
| 298 | }, |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | // Time to process the last, catch-all arm. We will generate something like |
| 303 | // |
| 304 | // last_arm_token => { |
| 305 | // let enable_wildcards = match last_arm_token { |
| 306 | // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, |
| 307 | // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, |
| 308 | // // ... |
| 309 | // _ => true, |
| 310 | // }; |
| 311 | // |
| 312 | // match (enable_wildcards, last_arm_token) { |
| 313 | // (true, TagToken(name @ Tag { kind: StartTag, .. })) |
| 314 | // => ..., // wildcard action for start tags |
| 315 | // |
| 316 | // (true, TagToken(name @ Tag { kind: EndTag, .. })) |
| 317 | // => ..., // wildcard action for end tags |
| 318 | // |
| 319 | // (_, token) => ... // using the pattern from that last arm |
| 320 | // } |
| 321 | // } |
| 322 | |
| 323 | let MatchTokenArm { binding, lhs, rhs } = last_arm; |
| 324 | |
| 325 | let (last_pat, last_expr) = match (binding, lhs, rhs) { |
| 326 | (Some(_), _, _) => panic!("the last arm cannot have an @-binding" ), |
| 327 | (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns" ), |
| 328 | (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'" ), |
| 329 | (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e), |
| 330 | }; |
| 331 | |
| 332 | quote! { |
| 333 | match #to_be_matched { |
| 334 | #( |
| 335 | #arms_code |
| 336 | )* |
| 337 | last_arm_token => { |
| 338 | let enable_wildcards = match last_arm_token { |
| 339 | #( |
| 340 | #wild_excluded_patterns => false, |
| 341 | )* |
| 342 | _ => true, |
| 343 | }; |
| 344 | match (enable_wildcards, last_arm_token) { |
| 345 | #( |
| 346 | (true, #wildcards_patterns) => #wildcards_expressions, |
| 347 | )* |
| 348 | (_, #last_pat) => #last_expr, |
| 349 | } |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | fn make_tag_pattern(binding: &proc_macro2::TokenStream, tag: Tag) -> proc_macro2::TokenStream { |
| 356 | let kind: TokenStream = match tag.kind { |
| 357 | TagKind::StartTag => quote!(crate::tokenizer::StartTag), |
| 358 | TagKind::EndTag => quote!(crate::tokenizer::EndTag), |
| 359 | }; |
| 360 | let name_field: TokenStream = if let Some(name: Ident) = tag.name { |
| 361 | let name: String = name.to_string(); |
| 362 | quote!(name: local_name!(#name),) |
| 363 | } else { |
| 364 | quote!() |
| 365 | }; |
| 366 | quote! { |
| 367 | crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) |
| 368 | } |
| 369 | } |
| 370 | |