1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | //! Helpers to generate the code for the Parser `derive``. |
11 | |
12 | use std::path::PathBuf; |
13 | |
14 | use proc_macro2::TokenStream; |
15 | use quote::{ToTokens, TokenStreamExt}; |
16 | use syn::{self, Ident}; |
17 | |
18 | use pest::unicode::unicode_property_names; |
19 | use pest_meta::ast::*; |
20 | use pest_meta::optimizer::*; |
21 | |
22 | use crate::docs::DocComment; |
23 | use crate::parse_derive::ParsedDerive; |
24 | |
25 | /// Generates the corresponding parser based based on the processed macro input. If `include_grammar` |
26 | /// is set to true, it'll generate an explicit "include_str" statement (done in pest_derive, but |
27 | /// turned off in the local bootstrap). |
28 | pub fn generate( |
29 | parsed_derive: ParsedDerive, |
30 | paths: Vec<PathBuf>, |
31 | rules: Vec<OptimizedRule>, |
32 | defaults: Vec<&str>, |
33 | doc_comment: &DocComment, |
34 | include_grammar: bool, |
35 | ) -> TokenStream { |
36 | let uses_eoi = defaults.iter().any(|name| *name == "EOI" ); |
37 | let name = parsed_derive.name; |
38 | let builtins = generate_builtin_rules(); |
39 | let include_fix = if include_grammar { |
40 | generate_include(&name, paths) |
41 | } else { |
42 | quote!() |
43 | }; |
44 | let rule_enum = generate_enum(&rules, doc_comment, uses_eoi, parsed_derive.non_exhaustive); |
45 | let patterns = generate_patterns(&rules, uses_eoi); |
46 | let skip = generate_skip(&rules); |
47 | |
48 | let mut rules: Vec<_> = rules.into_iter().map(generate_rule).collect(); |
49 | rules.extend(builtins.into_iter().filter_map(|(builtin, tokens)| { |
50 | if defaults.contains(&builtin) { |
51 | Some(tokens) |
52 | } else { |
53 | None |
54 | } |
55 | })); |
56 | |
57 | let (impl_generics, ty_generics, where_clause) = parsed_derive.generics.split_for_impl(); |
58 | |
59 | let result = result_type(); |
60 | |
61 | let parser_impl = quote! { |
62 | #[allow(clippy::all)] |
63 | impl #impl_generics ::pest::Parser<Rule> for #name #ty_generics #where_clause { |
64 | fn parse<'i>( |
65 | rule: Rule, |
66 | input: &'i str |
67 | ) -> #result< |
68 | ::pest::iterators::Pairs<'i, Rule>, |
69 | ::pest::error::Error<Rule> |
70 | > { |
71 | mod rules { |
72 | #![allow(clippy::upper_case_acronyms)] |
73 | pub mod hidden { |
74 | use super::super::Rule; |
75 | #skip |
76 | } |
77 | |
78 | pub mod visible { |
79 | use super::super::Rule; |
80 | #( #rules )* |
81 | } |
82 | |
83 | pub use self::visible::*; |
84 | } |
85 | |
86 | ::pest::state(input, |state| { |
87 | match rule { |
88 | #patterns |
89 | } |
90 | }) |
91 | } |
92 | } |
93 | }; |
94 | |
95 | quote! { |
96 | #include_fix |
97 | #rule_enum |
98 | #parser_impl |
99 | } |
100 | } |
101 | |
102 | // Note: All builtin rules should be validated as pest builtins in meta/src/validator.rs. |
103 | // Some should also be keywords. |
104 | fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { |
105 | let mut builtins = Vec::new(); |
106 | |
107 | insert_builtin!(builtins, ANY, state.skip(1)); |
108 | insert_builtin!( |
109 | builtins, |
110 | EOI, |
111 | state.rule(Rule::EOI, |state| state.end_of_input()) |
112 | ); |
113 | insert_builtin!(builtins, SOI, state.start_of_input()); |
114 | insert_builtin!(builtins, PEEK, state.stack_peek()); |
115 | insert_builtin!(builtins, PEEK_ALL, state.stack_match_peek()); |
116 | insert_builtin!(builtins, POP, state.stack_pop()); |
117 | insert_builtin!(builtins, POP_ALL, state.stack_match_pop()); |
118 | insert_builtin!(builtins, DROP, state.stack_drop()); |
119 | |
120 | insert_builtin!(builtins, ASCII_DIGIT, state.match_range('0' ..'9' )); |
121 | insert_builtin!(builtins, ASCII_NONZERO_DIGIT, state.match_range('1' ..'9' )); |
122 | insert_builtin!(builtins, ASCII_BIN_DIGIT, state.match_range('0' ..'1' )); |
123 | insert_builtin!(builtins, ASCII_OCT_DIGIT, state.match_range('0' ..'7' )); |
124 | insert_builtin!( |
125 | builtins, |
126 | ASCII_HEX_DIGIT, |
127 | state |
128 | .match_range('0' ..'9' ) |
129 | .or_else(|state| state.match_range('a' ..'f' )) |
130 | .or_else(|state| state.match_range('A' ..'F' )) |
131 | ); |
132 | insert_builtin!(builtins, ASCII_ALPHA_LOWER, state.match_range('a' ..'z' )); |
133 | insert_builtin!(builtins, ASCII_ALPHA_UPPER, state.match_range('A' ..'Z' )); |
134 | insert_builtin!( |
135 | builtins, |
136 | ASCII_ALPHA, |
137 | state |
138 | .match_range('a' ..'z' ) |
139 | .or_else(|state| state.match_range('A' ..'Z' )) |
140 | ); |
141 | insert_builtin!( |
142 | builtins, |
143 | ASCII_ALPHANUMERIC, |
144 | state |
145 | .match_range('a' ..'z' ) |
146 | .or_else(|state| state.match_range('A' ..'Z' )) |
147 | .or_else(|state| state.match_range('0' ..'9' )) |
148 | ); |
149 | insert_builtin!(builtins, ASCII, state.match_range(' \x00' ..' \x7f' )); |
150 | insert_builtin!( |
151 | builtins, |
152 | NEWLINE, |
153 | state |
154 | .match_string(" \n" ) |
155 | .or_else(|state| state.match_string(" \r\n" )) |
156 | .or_else(|state| state.match_string(" \r" )) |
157 | ); |
158 | |
159 | let box_ty = box_type(); |
160 | |
161 | for property in unicode_property_names() { |
162 | let property_ident: Ident = syn::parse_str(property).unwrap(); |
163 | // insert manually for #property substitution |
164 | builtins.push((property, quote! { |
165 | #[inline] |
166 | #[allow(dead_code, non_snake_case, unused_variables)] |
167 | fn #property_ident(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
168 | state.match_char_by(::pest::unicode::#property_ident) |
169 | } |
170 | })); |
171 | } |
172 | builtins |
173 | } |
174 | |
175 | /// Generate Rust `include_str!` for grammar files, then Cargo will watch changes in grammars. |
176 | fn generate_include(name: &Ident, paths: Vec<PathBuf>) -> TokenStream { |
177 | let const_name = format_ident!("_PEST_GRAMMAR_ {}" , name); |
178 | // Need to make this relative to the current directory since the path to the file |
179 | // is derived from the CARGO_MANIFEST_DIR environment variable |
180 | let current_dir = std::env::current_dir().expect("Unable to get current directory" ); |
181 | |
182 | let include_tokens = paths.iter().map(|path| { |
183 | let path = path.to_str().expect("non-Unicode path" ); |
184 | |
185 | let relative_path = current_dir |
186 | .join(path) |
187 | .to_str() |
188 | .expect("path contains invalid unicode" ) |
189 | .to_string(); |
190 | |
191 | quote! { |
192 | include_str!(#relative_path) |
193 | } |
194 | }); |
195 | |
196 | let len = include_tokens.len(); |
197 | quote! { |
198 | #[allow(non_upper_case_globals)] |
199 | const #const_name: [&'static str; #len] = [ |
200 | #(#include_tokens),* |
201 | ]; |
202 | } |
203 | } |
204 | |
205 | fn generate_enum( |
206 | rules: &[OptimizedRule], |
207 | doc_comment: &DocComment, |
208 | uses_eoi: bool, |
209 | non_exhaustive: bool, |
210 | ) -> TokenStream { |
211 | let rule_variants = rules.iter().map(|rule| { |
212 | let rule_name = format_ident!("r# {}" , rule.name); |
213 | |
214 | match doc_comment.line_docs.get(&rule.name) { |
215 | Some(doc) => quote! { |
216 | #[doc = #doc] |
217 | #rule_name |
218 | }, |
219 | None => quote! { |
220 | #rule_name |
221 | }, |
222 | } |
223 | }); |
224 | |
225 | let grammar_doc = &doc_comment.grammar_doc; |
226 | let mut result = if grammar_doc.is_empty() { |
227 | quote! { |
228 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
229 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
230 | } |
231 | } else { |
232 | quote! { |
233 | #[doc = #grammar_doc] |
234 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
235 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
236 | } |
237 | }; |
238 | if non_exhaustive { |
239 | result.append_all(quote! { |
240 | #[non_exhaustive] |
241 | }); |
242 | } |
243 | result.append_all(quote! { |
244 | pub enum Rule |
245 | }); |
246 | if uses_eoi { |
247 | result.append_all(quote! { |
248 | { |
249 | #[doc = "End-of-input" ] |
250 | EOI, |
251 | #( #rule_variants ),* |
252 | } |
253 | }); |
254 | } else { |
255 | result.append_all(quote! { |
256 | { |
257 | #( #rule_variants ),* |
258 | } |
259 | }) |
260 | }; |
261 | |
262 | let rules = rules.iter().map(|rule| { |
263 | let rule_name = format_ident!("r# {}" , rule.name); |
264 | quote! { #rule_name } |
265 | }); |
266 | |
267 | result.append_all(quote! { |
268 | impl Rule { |
269 | pub fn all_rules() -> &'static[Rule] { |
270 | &[ #(Rule::#rules), * ] |
271 | } |
272 | } |
273 | }); |
274 | |
275 | result |
276 | } |
277 | |
278 | fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { |
279 | let mut rules: Vec<TokenStream> = rulesimpl Iterator |
280 | .iter() |
281 | .map(|rule: &OptimizedRule| { |
282 | let rule: Ident = format_ident!("r# {}" , rule.name); |
283 | |
284 | quote! { |
285 | Rule::#rule => rules::#rule(state) |
286 | } |
287 | }) |
288 | .collect(); |
289 | |
290 | if uses_eoi { |
291 | rules.push(quote! { |
292 | Rule::EOI => rules::EOI(state) |
293 | }); |
294 | } |
295 | |
296 | quote! { |
297 | #( #rules ),* |
298 | } |
299 | } |
300 | |
301 | fn generate_rule(rule: OptimizedRule) -> TokenStream { |
302 | let name = format_ident!("r# {}" , rule.name); |
303 | let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { |
304 | generate_expr_atomic(rule.expr) |
305 | } else if rule.name == "WHITESPACE" || rule.name == "COMMENT" { |
306 | let atomic = generate_expr_atomic(rule.expr); |
307 | |
308 | quote! { |
309 | state.atomic(::pest::Atomicity::Atomic, |state| { |
310 | #atomic |
311 | }) |
312 | } |
313 | } else { |
314 | generate_expr(rule.expr) |
315 | }; |
316 | |
317 | let box_ty = box_type(); |
318 | |
319 | match rule.ty { |
320 | RuleType::Normal => quote! { |
321 | #[inline] |
322 | #[allow(non_snake_case, unused_variables)] |
323 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
324 | state.rule(Rule::#name, |state| { |
325 | #expr |
326 | }) |
327 | } |
328 | }, |
329 | RuleType::Silent => quote! { |
330 | #[inline] |
331 | #[allow(non_snake_case, unused_variables)] |
332 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
333 | #expr |
334 | } |
335 | }, |
336 | RuleType::Atomic => quote! { |
337 | #[inline] |
338 | #[allow(non_snake_case, unused_variables)] |
339 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
340 | state.rule(Rule::#name, |state| { |
341 | state.atomic(::pest::Atomicity::Atomic, |state| { |
342 | #expr |
343 | }) |
344 | }) |
345 | } |
346 | }, |
347 | RuleType::CompoundAtomic => quote! { |
348 | #[inline] |
349 | #[allow(non_snake_case, unused_variables)] |
350 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
351 | state.atomic(::pest::Atomicity::CompoundAtomic, |state| { |
352 | state.rule(Rule::#name, |state| { |
353 | #expr |
354 | }) |
355 | }) |
356 | } |
357 | }, |
358 | RuleType::NonAtomic => quote! { |
359 | #[inline] |
360 | #[allow(non_snake_case, unused_variables)] |
361 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
362 | state.atomic(::pest::Atomicity::NonAtomic, |state| { |
363 | state.rule(Rule::#name, |state| { |
364 | #expr |
365 | }) |
366 | }) |
367 | } |
368 | }, |
369 | } |
370 | } |
371 | |
372 | fn generate_skip(rules: &[OptimizedRule]) -> TokenStream { |
373 | let whitespace = rules.iter().any(|rule| rule.name == "WHITESPACE" ); |
374 | let comment = rules.iter().any(|rule| rule.name == "COMMENT" ); |
375 | |
376 | match (whitespace, comment) { |
377 | (false, false) => generate_rule!(skip, Ok(state)), |
378 | (true, false) => generate_rule!( |
379 | skip, |
380 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
381 | state.repeat(|state| super::visible::WHITESPACE(state)) |
382 | } else { |
383 | Ok(state) |
384 | } |
385 | ), |
386 | (false, true) => generate_rule!( |
387 | skip, |
388 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
389 | state.repeat(|state| super::visible::COMMENT(state)) |
390 | } else { |
391 | Ok(state) |
392 | } |
393 | ), |
394 | (true, true) => generate_rule!( |
395 | skip, |
396 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
397 | state.sequence(|state| { |
398 | state |
399 | .repeat(|state| super::visible::WHITESPACE(state)) |
400 | .and_then(|state| { |
401 | state.repeat(|state| { |
402 | state.sequence(|state| { |
403 | super::visible::COMMENT(state).and_then(|state| { |
404 | state.repeat(|state| super::visible::WHITESPACE(state)) |
405 | }) |
406 | }) |
407 | }) |
408 | }) |
409 | }) |
410 | } else { |
411 | Ok(state) |
412 | } |
413 | ), |
414 | } |
415 | } |
416 | |
417 | fn generate_expr(expr: OptimizedExpr) -> TokenStream { |
418 | match expr { |
419 | OptimizedExpr::Str(string) => { |
420 | quote! { |
421 | state.match_string(#string) |
422 | } |
423 | } |
424 | OptimizedExpr::Insens(string) => { |
425 | quote! { |
426 | state.match_insensitive(#string) |
427 | } |
428 | } |
429 | OptimizedExpr::Range(start, end) => { |
430 | let start = start.chars().next().unwrap(); |
431 | let end = end.chars().next().unwrap(); |
432 | |
433 | quote! { |
434 | state.match_range(#start..#end) |
435 | } |
436 | } |
437 | OptimizedExpr::Ident(ident) => { |
438 | let ident = format_ident!("r# {}" , ident); |
439 | quote! { self::#ident(state) } |
440 | } |
441 | OptimizedExpr::PeekSlice(start, end_) => { |
442 | let end = QuoteOption(end_); |
443 | quote! { |
444 | state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) |
445 | } |
446 | } |
447 | OptimizedExpr::PosPred(expr) => { |
448 | let expr = generate_expr(*expr); |
449 | |
450 | quote! { |
451 | state.lookahead(true, |state| { |
452 | #expr |
453 | }) |
454 | } |
455 | } |
456 | OptimizedExpr::NegPred(expr) => { |
457 | let expr = generate_expr(*expr); |
458 | |
459 | quote! { |
460 | state.lookahead(false, |state| { |
461 | #expr |
462 | }) |
463 | } |
464 | } |
465 | OptimizedExpr::Seq(lhs, rhs) => { |
466 | let head = generate_expr(*lhs); |
467 | let mut tail = vec![]; |
468 | let mut current = *rhs; |
469 | |
470 | while let OptimizedExpr::Seq(lhs, rhs) = current { |
471 | tail.push(generate_expr(*lhs)); |
472 | current = *rhs; |
473 | } |
474 | tail.push(generate_expr(current)); |
475 | |
476 | quote! { |
477 | state.sequence(|state| { |
478 | #head |
479 | #( |
480 | .and_then(|state| { |
481 | super::hidden::skip(state) |
482 | }).and_then(|state| { |
483 | #tail |
484 | }) |
485 | )* |
486 | }) |
487 | } |
488 | } |
489 | OptimizedExpr::Choice(lhs, rhs) => { |
490 | let head = generate_expr(*lhs); |
491 | let mut tail = vec![]; |
492 | let mut current = *rhs; |
493 | |
494 | while let OptimizedExpr::Choice(lhs, rhs) = current { |
495 | tail.push(generate_expr(*lhs)); |
496 | current = *rhs; |
497 | } |
498 | tail.push(generate_expr(current)); |
499 | |
500 | quote! { |
501 | #head |
502 | #( |
503 | .or_else(|state| { |
504 | #tail |
505 | }) |
506 | )* |
507 | } |
508 | } |
509 | OptimizedExpr::Opt(expr) => { |
510 | let expr = generate_expr(*expr); |
511 | |
512 | quote! { |
513 | state.optional(|state| { |
514 | #expr |
515 | }) |
516 | } |
517 | } |
518 | OptimizedExpr::Rep(expr) => { |
519 | let expr = generate_expr(*expr); |
520 | |
521 | quote! { |
522 | state.sequence(|state| { |
523 | state.optional(|state| { |
524 | #expr.and_then(|state| { |
525 | state.repeat(|state| { |
526 | state.sequence(|state| { |
527 | super::hidden::skip( |
528 | state |
529 | ).and_then(|state| { |
530 | #expr |
531 | }) |
532 | }) |
533 | }) |
534 | }) |
535 | }) |
536 | }) |
537 | } |
538 | } |
539 | #[cfg (feature = "grammar-extras" )] |
540 | OptimizedExpr::RepOnce(expr) => { |
541 | let expr = generate_expr(*expr); |
542 | |
543 | quote! { |
544 | state.sequence(|state| { |
545 | #expr.and_then(|state| { |
546 | state.repeat(|state| { |
547 | state.sequence(|state| { |
548 | super::hidden::skip( |
549 | state |
550 | ).and_then(|state| { |
551 | #expr |
552 | }) |
553 | }) |
554 | }) |
555 | }) |
556 | }) |
557 | } |
558 | } |
559 | OptimizedExpr::Skip(strings) => { |
560 | quote! { |
561 | let strings = [#(#strings),*]; |
562 | |
563 | state.skip_until(&strings) |
564 | } |
565 | } |
566 | OptimizedExpr::Push(expr) => { |
567 | let expr = generate_expr(*expr); |
568 | |
569 | quote! { |
570 | state.stack_push(|state| #expr) |
571 | } |
572 | } |
573 | OptimizedExpr::RestoreOnErr(expr) => { |
574 | let expr = generate_expr(*expr); |
575 | |
576 | quote! { |
577 | state.restore_on_err(|state| #expr) |
578 | } |
579 | } |
580 | #[cfg (feature = "grammar-extras" )] |
581 | OptimizedExpr::NodeTag(expr, tag) => match *expr { |
582 | OptimizedExpr::Opt(expr) => { |
583 | let expr = generate_expr(*expr); |
584 | quote! { |
585 | state.optional(|state| { |
586 | #expr.and_then(|state| state.tag_node(#tag)) |
587 | }) |
588 | } |
589 | } |
590 | OptimizedExpr::Rep(expr) => { |
591 | let expr = generate_expr(*expr); |
592 | quote! { |
593 | state.sequence(|state| { |
594 | state.optional(|state| { |
595 | #expr.and_then(|state| { |
596 | state.repeat(|state| { |
597 | state.sequence(|state| { |
598 | super::hidden::skip( |
599 | state |
600 | ).and_then(|state| { |
601 | #expr.and_then(|state| state.tag_node(#tag)) |
602 | }) |
603 | }) |
604 | }) |
605 | }).and_then(|state| state.tag_node(#tag)) |
606 | }) |
607 | }) |
608 | } |
609 | } |
610 | expr => { |
611 | let expr = generate_expr(expr); |
612 | quote! { |
613 | #expr.and_then(|state| state.tag_node(#tag)) |
614 | } |
615 | } |
616 | }, |
617 | } |
618 | } |
619 | |
620 | fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream { |
621 | match expr { |
622 | OptimizedExpr::Str(string) => { |
623 | quote! { |
624 | state.match_string(#string) |
625 | } |
626 | } |
627 | OptimizedExpr::Insens(string) => { |
628 | quote! { |
629 | state.match_insensitive(#string) |
630 | } |
631 | } |
632 | OptimizedExpr::Range(start, end) => { |
633 | let start = start.chars().next().unwrap(); |
634 | let end = end.chars().next().unwrap(); |
635 | |
636 | quote! { |
637 | state.match_range(#start..#end) |
638 | } |
639 | } |
640 | OptimizedExpr::Ident(ident) => { |
641 | let ident = format_ident!("r# {}" , ident); |
642 | quote! { self::#ident(state) } |
643 | } |
644 | OptimizedExpr::PeekSlice(start, end_) => { |
645 | let end = QuoteOption(end_); |
646 | quote! { |
647 | state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) |
648 | } |
649 | } |
650 | OptimizedExpr::PosPred(expr) => { |
651 | let expr = generate_expr_atomic(*expr); |
652 | |
653 | quote! { |
654 | state.lookahead(true, |state| { |
655 | #expr |
656 | }) |
657 | } |
658 | } |
659 | OptimizedExpr::NegPred(expr) => { |
660 | let expr = generate_expr_atomic(*expr); |
661 | |
662 | quote! { |
663 | state.lookahead(false, |state| { |
664 | #expr |
665 | }) |
666 | } |
667 | } |
668 | OptimizedExpr::Seq(lhs, rhs) => { |
669 | let head = generate_expr_atomic(*lhs); |
670 | let mut tail = vec![]; |
671 | let mut current = *rhs; |
672 | |
673 | while let OptimizedExpr::Seq(lhs, rhs) = current { |
674 | tail.push(generate_expr_atomic(*lhs)); |
675 | current = *rhs; |
676 | } |
677 | tail.push(generate_expr_atomic(current)); |
678 | |
679 | quote! { |
680 | state.sequence(|state| { |
681 | #head |
682 | #( |
683 | .and_then(|state| { |
684 | #tail |
685 | }) |
686 | )* |
687 | }) |
688 | } |
689 | } |
690 | OptimizedExpr::Choice(lhs, rhs) => { |
691 | let head = generate_expr_atomic(*lhs); |
692 | let mut tail = vec![]; |
693 | let mut current = *rhs; |
694 | |
695 | while let OptimizedExpr::Choice(lhs, rhs) = current { |
696 | tail.push(generate_expr_atomic(*lhs)); |
697 | current = *rhs; |
698 | } |
699 | tail.push(generate_expr_atomic(current)); |
700 | |
701 | quote! { |
702 | #head |
703 | #( |
704 | .or_else(|state| { |
705 | #tail |
706 | }) |
707 | )* |
708 | } |
709 | } |
710 | OptimizedExpr::Opt(expr) => { |
711 | let expr = generate_expr_atomic(*expr); |
712 | |
713 | quote! { |
714 | state.optional(|state| { |
715 | #expr |
716 | }) |
717 | } |
718 | } |
719 | OptimizedExpr::Rep(expr) => { |
720 | let expr = generate_expr_atomic(*expr); |
721 | |
722 | quote! { |
723 | state.repeat(|state| { |
724 | #expr |
725 | }) |
726 | } |
727 | } |
728 | #[cfg (feature = "grammar-extras" )] |
729 | OptimizedExpr::RepOnce(expr) => { |
730 | let expr = generate_expr_atomic(*expr); |
731 | |
732 | quote! { |
733 | state.sequence(|state| { |
734 | #expr.and_then(|state| { |
735 | state.repeat(|state| { |
736 | state.sequence(|state| { |
737 | #expr |
738 | }) |
739 | }) |
740 | }) |
741 | }) |
742 | } |
743 | } |
744 | OptimizedExpr::Skip(strings) => { |
745 | quote! { |
746 | let strings = [#(#strings),*]; |
747 | |
748 | state.skip_until(&strings) |
749 | } |
750 | } |
751 | OptimizedExpr::Push(expr) => { |
752 | let expr = generate_expr_atomic(*expr); |
753 | |
754 | quote! { |
755 | state.stack_push(|state| #expr) |
756 | } |
757 | } |
758 | OptimizedExpr::RestoreOnErr(expr) => { |
759 | let expr = generate_expr_atomic(*expr); |
760 | |
761 | quote! { |
762 | state.restore_on_err(|state| #expr) |
763 | } |
764 | } |
765 | #[cfg (feature = "grammar-extras" )] |
766 | OptimizedExpr::NodeTag(expr, tag) => match *expr { |
767 | OptimizedExpr::Opt(expr) => { |
768 | let expr = generate_expr_atomic(*expr); |
769 | |
770 | quote! { |
771 | state.optional(|state| { |
772 | #expr.and_then(|state| state.tag_node(#tag)) |
773 | }) |
774 | } |
775 | } |
776 | OptimizedExpr::Rep(expr) => { |
777 | let expr = generate_expr_atomic(*expr); |
778 | |
779 | quote! { |
780 | state.repeat(|state| { |
781 | #expr.and_then(|state| state.tag_node(#tag)) |
782 | }) |
783 | } |
784 | } |
785 | expr => { |
786 | let expr = generate_expr_atomic(expr); |
787 | quote! { |
788 | #expr.and_then(|state| state.tag_node(#tag)) |
789 | } |
790 | } |
791 | }, |
792 | } |
793 | } |
794 | |
795 | struct QuoteOption<T>(Option<T>); |
796 | |
797 | impl<T: ToTokens> ToTokens for QuoteOption<T> { |
798 | fn to_tokens(&self, tokens: &mut TokenStream) { |
799 | let option: TokenStream = option_type(); |
800 | tokens.append_all(iter:match self.0 { |
801 | Some(ref t: &T) => quote! { #option::Some(#t) }, |
802 | None => quote! { #option::None }, |
803 | }); |
804 | } |
805 | } |
806 | |
807 | fn box_type() -> TokenStream { |
808 | #[cfg (feature = "std" )] |
809 | quote! { ::std::boxed::Box } |
810 | |
811 | #[cfg (not(feature = "std" ))] |
812 | quote! { ::alloc::boxed::Box } |
813 | } |
814 | |
815 | fn result_type() -> TokenStream { |
816 | #[cfg (feature = "std" )] |
817 | quote! { ::std::result::Result } |
818 | |
819 | #[cfg (not(feature = "std" ))] |
820 | quote! { ::core::result::Result } |
821 | } |
822 | |
823 | fn option_type() -> TokenStream { |
824 | #[cfg (feature = "std" )] |
825 | quote! { ::std::option::Option } |
826 | |
827 | #[cfg (not(feature = "std" ))] |
828 | quote! { ::core::option::Option } |
829 | } |
830 | |
831 | #[cfg (test)] |
832 | mod tests { |
833 | use super::*; |
834 | |
835 | use proc_macro2::Span; |
836 | use std::collections::HashMap; |
837 | use syn::Generics; |
838 | |
839 | #[test ] |
840 | fn rule_enum_simple() { |
841 | let rules = vec![OptimizedRule { |
842 | name: "f" .to_owned(), |
843 | ty: RuleType::Normal, |
844 | expr: OptimizedExpr::Ident("g" .to_owned()), |
845 | }]; |
846 | |
847 | let mut line_docs = HashMap::new(); |
848 | line_docs.insert("f" .to_owned(), "This is rule comment" .to_owned()); |
849 | |
850 | let doc_comment = &DocComment { |
851 | grammar_doc: "Rule doc \nhello" .to_owned(), |
852 | line_docs, |
853 | }; |
854 | |
855 | assert_eq!( |
856 | generate_enum(&rules, doc_comment, false, false).to_string(), |
857 | quote! { |
858 | #[doc = "Rule doc \nhello" ] |
859 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
860 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
861 | pub enum Rule { |
862 | #[doc = "This is rule comment" ] |
863 | r#f |
864 | } |
865 | impl Rule { |
866 | pub fn all_rules() -> &'static [Rule] { |
867 | &[Rule::r#f] |
868 | } |
869 | } |
870 | } |
871 | .to_string() |
872 | ); |
873 | } |
874 | |
875 | #[test ] |
876 | fn rule_empty_doc() { |
877 | let rules = vec![OptimizedRule { |
878 | name: "f" .to_owned(), |
879 | ty: RuleType::Normal, |
880 | expr: OptimizedExpr::Ident("g" .to_owned()), |
881 | }]; |
882 | |
883 | let mut line_docs = HashMap::new(); |
884 | line_docs.insert("f" .to_owned(), "This is rule comment" .to_owned()); |
885 | |
886 | let doc_comment = &DocComment { |
887 | grammar_doc: "" .to_owned(), |
888 | line_docs, |
889 | }; |
890 | |
891 | assert_eq!( |
892 | generate_enum(&rules, doc_comment, false, false).to_string(), |
893 | quote! { |
894 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
895 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
896 | pub enum Rule { |
897 | #[doc = "This is rule comment" ] |
898 | r#f |
899 | } |
900 | impl Rule { |
901 | pub fn all_rules() -> &'static [Rule] { |
902 | &[Rule::r#f] |
903 | } |
904 | } |
905 | } |
906 | .to_string() |
907 | ); |
908 | } |
909 | |
910 | #[test ] |
911 | fn sequence() { |
912 | let expr = OptimizedExpr::Seq( |
913 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
914 | Box::new(OptimizedExpr::Seq( |
915 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
916 | Box::new(OptimizedExpr::Seq( |
917 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
918 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
919 | )), |
920 | )), |
921 | ); |
922 | |
923 | assert_eq!( |
924 | generate_expr(expr).to_string(), |
925 | quote! { |
926 | state.sequence(|state| { |
927 | state.match_string("a" ).and_then(|state| { |
928 | super::hidden::skip(state) |
929 | }).and_then(|state| { |
930 | state.match_string("b" ) |
931 | }).and_then(|state| { |
932 | super::hidden::skip(state) |
933 | }).and_then(|state| { |
934 | state.match_string("c" ) |
935 | }).and_then(|state| { |
936 | super::hidden::skip(state) |
937 | }).and_then(|state| { |
938 | state.match_string("d" ) |
939 | }) |
940 | }) |
941 | } |
942 | .to_string() |
943 | ); |
944 | } |
945 | |
946 | #[test ] |
947 | fn sequence_atomic() { |
948 | let expr = OptimizedExpr::Seq( |
949 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
950 | Box::new(OptimizedExpr::Seq( |
951 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
952 | Box::new(OptimizedExpr::Seq( |
953 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
954 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
955 | )), |
956 | )), |
957 | ); |
958 | |
959 | assert_eq!( |
960 | generate_expr_atomic(expr).to_string(), |
961 | quote! { |
962 | state.sequence(|state| { |
963 | state.match_string("a" ).and_then(|state| { |
964 | state.match_string("b" ) |
965 | }).and_then(|state| { |
966 | state.match_string("c" ) |
967 | }).and_then(|state| { |
968 | state.match_string("d" ) |
969 | }) |
970 | }) |
971 | } |
972 | .to_string() |
973 | ); |
974 | } |
975 | |
976 | #[test ] |
977 | fn choice() { |
978 | let expr = OptimizedExpr::Choice( |
979 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
980 | Box::new(OptimizedExpr::Choice( |
981 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
982 | Box::new(OptimizedExpr::Choice( |
983 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
984 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
985 | )), |
986 | )), |
987 | ); |
988 | |
989 | assert_eq!( |
990 | generate_expr(expr).to_string(), |
991 | quote! { |
992 | state.match_string("a" ).or_else(|state| { |
993 | state.match_string("b" ) |
994 | }).or_else(|state| { |
995 | state.match_string("c" ) |
996 | }).or_else(|state| { |
997 | state.match_string("d" ) |
998 | }) |
999 | } |
1000 | .to_string() |
1001 | ); |
1002 | } |
1003 | |
1004 | #[test ] |
1005 | fn choice_atomic() { |
1006 | let expr = OptimizedExpr::Choice( |
1007 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
1008 | Box::new(OptimizedExpr::Choice( |
1009 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
1010 | Box::new(OptimizedExpr::Choice( |
1011 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
1012 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
1013 | )), |
1014 | )), |
1015 | ); |
1016 | |
1017 | assert_eq!( |
1018 | generate_expr_atomic(expr).to_string(), |
1019 | quote! { |
1020 | state.match_string("a" ).or_else(|state| { |
1021 | state.match_string("b" ) |
1022 | }).or_else(|state| { |
1023 | state.match_string("c" ) |
1024 | }).or_else(|state| { |
1025 | state.match_string("d" ) |
1026 | }) |
1027 | } |
1028 | .to_string() |
1029 | ); |
1030 | } |
1031 | |
1032 | #[test ] |
1033 | fn skip() { |
1034 | let expr = OptimizedExpr::Skip(vec!["a" .to_owned(), "b" .to_owned()]); |
1035 | |
1036 | assert_eq!( |
1037 | generate_expr_atomic(expr).to_string(), |
1038 | quote! { |
1039 | let strings = ["a" , "b" ]; |
1040 | |
1041 | state.skip_until(&strings) |
1042 | } |
1043 | .to_string() |
1044 | ); |
1045 | } |
1046 | |
1047 | #[test ] |
1048 | fn expr_complex() { |
1049 | let expr = OptimizedExpr::Choice( |
1050 | Box::new(OptimizedExpr::Ident("a" .to_owned())), |
1051 | Box::new(OptimizedExpr::Seq( |
1052 | Box::new(OptimizedExpr::Range("a" .to_owned(), "b" .to_owned())), |
1053 | Box::new(OptimizedExpr::Seq( |
1054 | Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( |
1055 | Box::new(OptimizedExpr::Insens("b" .to_owned())), |
1056 | )))), |
1057 | Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( |
1058 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( |
1059 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
1060 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
1061 | )))), |
1062 | )))), |
1063 | )), |
1064 | )), |
1065 | ); |
1066 | |
1067 | let sequence = quote! { |
1068 | state.sequence(|state| { |
1069 | super::hidden::skip(state).and_then( |
1070 | |state| { |
1071 | state.match_insensitive("b" ) |
1072 | } |
1073 | ) |
1074 | }) |
1075 | }; |
1076 | let repeat = quote! { |
1077 | state.repeat(|state| { |
1078 | state.sequence(|state| { |
1079 | super::hidden::skip(state).and_then(|state| { |
1080 | state.match_string("c" ) |
1081 | .or_else(|state| { |
1082 | state.match_string("d" ) |
1083 | }) |
1084 | }) |
1085 | }) |
1086 | }) |
1087 | }; |
1088 | assert_eq!( |
1089 | generate_expr(expr).to_string(), |
1090 | quote! { |
1091 | self::r#a(state).or_else(|state| { |
1092 | state.sequence(|state| { |
1093 | state.match_range('a' ..'b' ).and_then(|state| { |
1094 | super::hidden::skip(state) |
1095 | }).and_then(|state| { |
1096 | state.lookahead(false, |state| { |
1097 | state.sequence(|state| { |
1098 | state.optional(|state| { |
1099 | state.match_insensitive( |
1100 | "b" |
1101 | ).and_then(|state| { |
1102 | state.repeat(|state| { |
1103 | #sequence |
1104 | }) |
1105 | }) |
1106 | }) |
1107 | }) |
1108 | }) |
1109 | }).and_then(|state| { |
1110 | super::hidden::skip(state) |
1111 | }).and_then(|state| { |
1112 | state.lookahead(true, |state| { |
1113 | state.optional(|state| { |
1114 | state.sequence(|state| { |
1115 | state.optional(|state| { |
1116 | state.match_string("c" ) |
1117 | .or_else(|state| { |
1118 | state.match_string("d" ) |
1119 | }).and_then(|state| { |
1120 | #repeat |
1121 | }) |
1122 | }) |
1123 | }) |
1124 | }) |
1125 | }) |
1126 | }) |
1127 | }) |
1128 | }) |
1129 | } |
1130 | .to_string() |
1131 | ); |
1132 | } |
1133 | |
1134 | #[test ] |
1135 | fn expr_complex_atomic() { |
1136 | let expr = OptimizedExpr::Choice( |
1137 | Box::new(OptimizedExpr::Ident("a" .to_owned())), |
1138 | Box::new(OptimizedExpr::Seq( |
1139 | Box::new(OptimizedExpr::Range("a" .to_owned(), "b" .to_owned())), |
1140 | Box::new(OptimizedExpr::Seq( |
1141 | Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( |
1142 | Box::new(OptimizedExpr::Insens("b" .to_owned())), |
1143 | )))), |
1144 | Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( |
1145 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( |
1146 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
1147 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
1148 | )))), |
1149 | )))), |
1150 | )), |
1151 | )), |
1152 | ); |
1153 | |
1154 | assert_eq!( |
1155 | generate_expr_atomic(expr).to_string(), |
1156 | quote! { |
1157 | self::r#a(state).or_else(|state| { |
1158 | state.sequence(|state| { |
1159 | state.match_range('a' ..'b' ).and_then(|state| { |
1160 | state.lookahead(false, |state| { |
1161 | state.repeat(|state| { |
1162 | state.match_insensitive("b" ) |
1163 | }) |
1164 | }) |
1165 | }).and_then(|state| { |
1166 | state.lookahead(true, |state| { |
1167 | state.optional(|state| { |
1168 | state.repeat(|state| { |
1169 | state.match_string("c" ) |
1170 | .or_else(|state| { |
1171 | state.match_string("d" ) |
1172 | }) |
1173 | }) |
1174 | }) |
1175 | }) |
1176 | }) |
1177 | }) |
1178 | }) |
1179 | } |
1180 | .to_string() |
1181 | ); |
1182 | } |
1183 | |
1184 | #[test ] |
1185 | fn test_generate_complete() { |
1186 | let name = Ident::new("MyParser" , Span::call_site()); |
1187 | let generics = Generics::default(); |
1188 | |
1189 | let rules = vec![ |
1190 | OptimizedRule { |
1191 | name: "a" .to_owned(), |
1192 | ty: RuleType::Silent, |
1193 | expr: OptimizedExpr::Str("b" .to_owned()), |
1194 | }, |
1195 | OptimizedRule { |
1196 | name: "if" .to_owned(), |
1197 | ty: RuleType::Silent, |
1198 | expr: OptimizedExpr::Ident("a" .to_owned()), |
1199 | }, |
1200 | ]; |
1201 | |
1202 | let mut line_docs = HashMap::new(); |
1203 | line_docs.insert("if" .to_owned(), "If statement" .to_owned()); |
1204 | |
1205 | let doc_comment = &DocComment { |
1206 | line_docs, |
1207 | grammar_doc: "This is Rule doc \nThis is second line" .to_owned(), |
1208 | }; |
1209 | |
1210 | let defaults = vec!["ANY" ]; |
1211 | let result = result_type(); |
1212 | let box_ty = box_type(); |
1213 | let current_dir = std::env::current_dir().expect("Unable to get current directory" ); |
1214 | |
1215 | let base_path = current_dir.join("base.pest" ).to_str().unwrap().to_string(); |
1216 | let test_path = current_dir.join("test.pest" ).to_str().unwrap().to_string(); |
1217 | let parsed_derive = ParsedDerive { |
1218 | name, |
1219 | generics, |
1220 | non_exhaustive: false, |
1221 | }; |
1222 | assert_eq!( |
1223 | generate(parsed_derive, vec![PathBuf::from("base.pest" ), PathBuf::from("test.pest" )], rules, defaults, doc_comment, true).to_string(), |
1224 | quote! { |
1225 | #[allow(non_upper_case_globals)] |
1226 | const _PEST_GRAMMAR_MyParser: [&'static str; 2usize] = [include_str!(#base_path), include_str!(#test_path)]; |
1227 | |
1228 | #[doc = "This is Rule doc \nThis is second line" ] |
1229 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
1230 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1231 | pub enum Rule { |
1232 | r#a, |
1233 | #[doc = "If statement" ] |
1234 | r#if |
1235 | } |
1236 | impl Rule { |
1237 | pub fn all_rules() -> &'static [Rule] { |
1238 | &[Rule::r#a, Rule::r#if] |
1239 | } |
1240 | } |
1241 | |
1242 | #[allow(clippy::all)] |
1243 | impl ::pest::Parser<Rule> for MyParser { |
1244 | fn parse<'i>( |
1245 | rule: Rule, |
1246 | input: &'i str |
1247 | ) -> #result< |
1248 | ::pest::iterators::Pairs<'i, Rule>, |
1249 | ::pest::error::Error<Rule> |
1250 | > { |
1251 | mod rules { |
1252 | #![allow(clippy::upper_case_acronyms)] |
1253 | pub mod hidden { |
1254 | use super::super::Rule; |
1255 | |
1256 | #[inline] |
1257 | #[allow(dead_code, non_snake_case, unused_variables)] |
1258 | pub fn skip(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1259 | Ok(state) |
1260 | } |
1261 | } |
1262 | |
1263 | pub mod visible { |
1264 | use super::super::Rule; |
1265 | |
1266 | #[inline] |
1267 | #[allow(non_snake_case, unused_variables)] |
1268 | pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1269 | state.match_string("b" ) |
1270 | } |
1271 | |
1272 | #[inline] |
1273 | #[allow(non_snake_case, unused_variables)] |
1274 | pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1275 | self::r#a(state) |
1276 | } |
1277 | |
1278 | #[inline] |
1279 | #[allow(dead_code, non_snake_case, unused_variables)] |
1280 | pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1281 | state.skip(1) |
1282 | } |
1283 | } |
1284 | |
1285 | pub use self::visible::*; |
1286 | } |
1287 | |
1288 | ::pest::state(input, |state| { |
1289 | match rule { |
1290 | Rule::r#a => rules::r#a(state), |
1291 | Rule::r#if => rules::r#if(state) |
1292 | } |
1293 | }) |
1294 | } |
1295 | } |
1296 | }.to_string() |
1297 | ); |
1298 | } |
1299 | } |
1300 | |