1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use std::path::PathBuf; |
11 | |
12 | use proc_macro2::TokenStream; |
13 | use quote::{ToTokens, TokenStreamExt}; |
14 | use syn::{self, Generics, Ident}; |
15 | |
16 | use pest::unicode::unicode_property_names; |
17 | use pest_meta::ast::*; |
18 | use pest_meta::optimizer::*; |
19 | |
20 | use crate::docs::DocComment; |
21 | |
22 | pub(crate) fn generate( |
23 | name: Ident, |
24 | generics: &Generics, |
25 | paths: Vec<PathBuf>, |
26 | rules: Vec<OptimizedRule>, |
27 | defaults: Vec<&str>, |
28 | doc_comment: &DocComment, |
29 | include_grammar: bool, |
30 | ) -> TokenStream { |
31 | let uses_eoi = defaults.iter().any(|name| *name == "EOI" ); |
32 | |
33 | let builtins = generate_builtin_rules(); |
34 | let include_fix = if include_grammar { |
35 | generate_include(&name, paths) |
36 | } else { |
37 | quote!() |
38 | }; |
39 | let rule_enum = generate_enum(&rules, doc_comment, uses_eoi); |
40 | let patterns = generate_patterns(&rules, uses_eoi); |
41 | let skip = generate_skip(&rules); |
42 | |
43 | let mut rules: Vec<_> = rules.into_iter().map(generate_rule).collect(); |
44 | rules.extend(builtins.into_iter().filter_map(|(builtin, tokens)| { |
45 | if defaults.contains(&builtin) { |
46 | Some(tokens) |
47 | } else { |
48 | None |
49 | } |
50 | })); |
51 | |
52 | let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); |
53 | |
54 | let result = result_type(); |
55 | |
56 | let parser_impl = quote! { |
57 | #[allow(clippy::all)] |
58 | impl #impl_generics ::pest::Parser<Rule> for #name #ty_generics #where_clause { |
59 | fn parse<'i>( |
60 | rule: Rule, |
61 | input: &'i str |
62 | ) -> #result< |
63 | ::pest::iterators::Pairs<'i, Rule>, |
64 | ::pest::error::Error<Rule> |
65 | > { |
66 | mod rules { |
67 | #![allow(clippy::upper_case_acronyms)] |
68 | pub mod hidden { |
69 | use super::super::Rule; |
70 | #skip |
71 | } |
72 | |
73 | pub mod visible { |
74 | use super::super::Rule; |
75 | #( #rules )* |
76 | } |
77 | |
78 | pub use self::visible::*; |
79 | } |
80 | |
81 | ::pest::state(input, |state| { |
82 | match rule { |
83 | #patterns |
84 | } |
85 | }) |
86 | } |
87 | } |
88 | }; |
89 | |
90 | quote! { |
91 | #include_fix |
92 | #rule_enum |
93 | #parser_impl |
94 | } |
95 | } |
96 | |
97 | // Note: All builtin rules should be validated as pest builtins in meta/src/validator.rs. |
98 | // Some should also be keywords. |
99 | fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { |
100 | let mut builtins = Vec::new(); |
101 | |
102 | insert_builtin!(builtins, ANY, state.skip(1)); |
103 | insert_builtin!( |
104 | builtins, |
105 | EOI, |
106 | state.rule(Rule::EOI, |state| state.end_of_input()) |
107 | ); |
108 | insert_builtin!(builtins, SOI, state.start_of_input()); |
109 | insert_builtin!(builtins, PEEK, state.stack_peek()); |
110 | insert_builtin!(builtins, PEEK_ALL, state.stack_match_peek()); |
111 | insert_builtin!(builtins, POP, state.stack_pop()); |
112 | insert_builtin!(builtins, POP_ALL, state.stack_match_pop()); |
113 | insert_builtin!(builtins, DROP, state.stack_drop()); |
114 | |
115 | insert_builtin!(builtins, ASCII_DIGIT, state.match_range('0' ..'9' )); |
116 | insert_builtin!(builtins, ASCII_NONZERO_DIGIT, state.match_range('1' ..'9' )); |
117 | insert_builtin!(builtins, ASCII_BIN_DIGIT, state.match_range('0' ..'1' )); |
118 | insert_builtin!(builtins, ASCII_OCT_DIGIT, state.match_range('0' ..'7' )); |
119 | insert_builtin!( |
120 | builtins, |
121 | ASCII_HEX_DIGIT, |
122 | state |
123 | .match_range('0' ..'9' ) |
124 | .or_else(|state| state.match_range('a' ..'f' )) |
125 | .or_else(|state| state.match_range('A' ..'F' )) |
126 | ); |
127 | insert_builtin!(builtins, ASCII_ALPHA_LOWER, state.match_range('a' ..'z' )); |
128 | insert_builtin!(builtins, ASCII_ALPHA_UPPER, state.match_range('A' ..'Z' )); |
129 | insert_builtin!( |
130 | builtins, |
131 | ASCII_ALPHA, |
132 | state |
133 | .match_range('a' ..'z' ) |
134 | .or_else(|state| state.match_range('A' ..'Z' )) |
135 | ); |
136 | insert_builtin!( |
137 | builtins, |
138 | ASCII_ALPHANUMERIC, |
139 | state |
140 | .match_range('a' ..'z' ) |
141 | .or_else(|state| state.match_range('A' ..'Z' )) |
142 | .or_else(|state| state.match_range('0' ..'9' )) |
143 | ); |
144 | insert_builtin!(builtins, ASCII, state.match_range(' \x00' ..' \x7f' )); |
145 | insert_builtin!( |
146 | builtins, |
147 | NEWLINE, |
148 | state |
149 | .match_string(" \n" ) |
150 | .or_else(|state| state.match_string(" \r\n" )) |
151 | .or_else(|state| state.match_string(" \r" )) |
152 | ); |
153 | |
154 | let box_ty = box_type(); |
155 | |
156 | for property in unicode_property_names() { |
157 | let property_ident: Ident = syn::parse_str(property).unwrap(); |
158 | // insert manually for #property substitution |
159 | builtins.push((property, quote! { |
160 | #[inline] |
161 | #[allow(dead_code, non_snake_case, unused_variables)] |
162 | fn #property_ident(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
163 | state.match_char_by(::pest::unicode::#property_ident) |
164 | } |
165 | })); |
166 | } |
167 | builtins |
168 | } |
169 | |
170 | /// Generate Rust `include_str!` for grammar files, then Cargo will watch changes in grammars. |
171 | fn generate_include(name: &Ident, paths: Vec<PathBuf>) -> TokenStream { |
172 | let const_name = format_ident!("_PEST_GRAMMAR_ {}" , name); |
173 | // Need to make this relative to the current directory since the path to the file |
174 | // is derived from the CARGO_MANIFEST_DIR environment variable |
175 | let current_dir = std::env::current_dir().expect("Unable to get current directory" ); |
176 | |
177 | let include_tokens = paths.iter().map(|path| { |
178 | let path = path.to_str().expect("non-Unicode path" ); |
179 | |
180 | let relative_path = current_dir |
181 | .join(path) |
182 | .to_str() |
183 | .expect("path contains invalid unicode" ) |
184 | .to_string(); |
185 | |
186 | quote! { |
187 | include_str!(#relative_path) |
188 | } |
189 | }); |
190 | |
191 | let len = include_tokens.len(); |
192 | quote! { |
193 | #[allow(non_upper_case_globals)] |
194 | const #const_name: [&'static str; #len] = [ |
195 | #(#include_tokens),* |
196 | ]; |
197 | } |
198 | } |
199 | |
200 | fn generate_enum(rules: &[OptimizedRule], doc_comment: &DocComment, uses_eoi: bool) -> TokenStream { |
201 | let rules = rules.iter().map(|rule| { |
202 | let rule_name = format_ident!("r# {}" , rule.name); |
203 | |
204 | match doc_comment.line_docs.get(&rule.name) { |
205 | Some(doc) => quote! { |
206 | #[doc = #doc] |
207 | #rule_name |
208 | }, |
209 | None => quote! { |
210 | #rule_name |
211 | }, |
212 | } |
213 | }); |
214 | |
215 | let grammar_doc = &doc_comment.grammar_doc; |
216 | if uses_eoi { |
217 | quote! { |
218 | #[doc = #grammar_doc] |
219 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
220 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
221 | pub enum Rule { |
222 | EOI, |
223 | #( #rules ),* |
224 | } |
225 | } |
226 | } else { |
227 | quote! { |
228 | #[doc = #grammar_doc] |
229 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
230 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
231 | pub enum Rule { |
232 | #( #rules ),* |
233 | } |
234 | } |
235 | } |
236 | } |
237 | |
238 | fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { |
239 | let mut rules: Vec<TokenStream> = rulesimpl Iterator |
240 | .iter() |
241 | .map(|rule: &OptimizedRule| { |
242 | let rule: Ident = format_ident!("r# {}" , rule.name); |
243 | |
244 | quote! { |
245 | Rule::#rule => rules::#rule(state) |
246 | } |
247 | }) |
248 | .collect(); |
249 | |
250 | if uses_eoi { |
251 | rules.push(quote! { |
252 | Rule::EOI => rules::EOI(state) |
253 | }); |
254 | } |
255 | |
256 | quote! { |
257 | #( #rules ),* |
258 | } |
259 | } |
260 | |
261 | fn generate_rule(rule: OptimizedRule) -> TokenStream { |
262 | let name = format_ident!("r# {}" , rule.name); |
263 | let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { |
264 | generate_expr_atomic(rule.expr) |
265 | } else if rule.name == "WHITESPACE" || rule.name == "COMMENT" { |
266 | let atomic = generate_expr_atomic(rule.expr); |
267 | |
268 | quote! { |
269 | state.atomic(::pest::Atomicity::Atomic, |state| { |
270 | #atomic |
271 | }) |
272 | } |
273 | } else { |
274 | generate_expr(rule.expr) |
275 | }; |
276 | |
277 | let box_ty = box_type(); |
278 | |
279 | match rule.ty { |
280 | RuleType::Normal => quote! { |
281 | #[inline] |
282 | #[allow(non_snake_case, unused_variables)] |
283 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
284 | state.rule(Rule::#name, |state| { |
285 | #expr |
286 | }) |
287 | } |
288 | }, |
289 | RuleType::Silent => quote! { |
290 | #[inline] |
291 | #[allow(non_snake_case, unused_variables)] |
292 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
293 | #expr |
294 | } |
295 | }, |
296 | RuleType::Atomic => quote! { |
297 | #[inline] |
298 | #[allow(non_snake_case, unused_variables)] |
299 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
300 | state.rule(Rule::#name, |state| { |
301 | state.atomic(::pest::Atomicity::Atomic, |state| { |
302 | #expr |
303 | }) |
304 | }) |
305 | } |
306 | }, |
307 | RuleType::CompoundAtomic => quote! { |
308 | #[inline] |
309 | #[allow(non_snake_case, unused_variables)] |
310 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
311 | state.atomic(::pest::Atomicity::CompoundAtomic, |state| { |
312 | state.rule(Rule::#name, |state| { |
313 | #expr |
314 | }) |
315 | }) |
316 | } |
317 | }, |
318 | RuleType::NonAtomic => quote! { |
319 | #[inline] |
320 | #[allow(non_snake_case, unused_variables)] |
321 | pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
322 | state.atomic(::pest::Atomicity::NonAtomic, |state| { |
323 | state.rule(Rule::#name, |state| { |
324 | #expr |
325 | }) |
326 | }) |
327 | } |
328 | }, |
329 | } |
330 | } |
331 | |
332 | fn generate_skip(rules: &[OptimizedRule]) -> TokenStream { |
333 | let whitespace = rules.iter().any(|rule| rule.name == "WHITESPACE" ); |
334 | let comment = rules.iter().any(|rule| rule.name == "COMMENT" ); |
335 | |
336 | match (whitespace, comment) { |
337 | (false, false) => generate_rule!(skip, Ok(state)), |
338 | (true, false) => generate_rule!( |
339 | skip, |
340 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
341 | state.repeat(|state| super::visible::WHITESPACE(state)) |
342 | } else { |
343 | Ok(state) |
344 | } |
345 | ), |
346 | (false, true) => generate_rule!( |
347 | skip, |
348 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
349 | state.repeat(|state| super::visible::COMMENT(state)) |
350 | } else { |
351 | Ok(state) |
352 | } |
353 | ), |
354 | (true, true) => generate_rule!( |
355 | skip, |
356 | if state.atomicity() == ::pest::Atomicity::NonAtomic { |
357 | state.sequence(|state| { |
358 | state |
359 | .repeat(|state| super::visible::WHITESPACE(state)) |
360 | .and_then(|state| { |
361 | state.repeat(|state| { |
362 | state.sequence(|state| { |
363 | super::visible::COMMENT(state).and_then(|state| { |
364 | state.repeat(|state| super::visible::WHITESPACE(state)) |
365 | }) |
366 | }) |
367 | }) |
368 | }) |
369 | }) |
370 | } else { |
371 | Ok(state) |
372 | } |
373 | ), |
374 | } |
375 | } |
376 | |
377 | fn generate_expr(expr: OptimizedExpr) -> TokenStream { |
378 | match expr { |
379 | OptimizedExpr::Str(string) => { |
380 | quote! { |
381 | state.match_string(#string) |
382 | } |
383 | } |
384 | OptimizedExpr::Insens(string) => { |
385 | quote! { |
386 | state.match_insensitive(#string) |
387 | } |
388 | } |
389 | OptimizedExpr::Range(start, end) => { |
390 | let start = start.chars().next().unwrap(); |
391 | let end = end.chars().next().unwrap(); |
392 | |
393 | quote! { |
394 | state.match_range(#start..#end) |
395 | } |
396 | } |
397 | OptimizedExpr::Ident(ident) => { |
398 | let ident = format_ident!("r# {}" , ident); |
399 | quote! { self::#ident(state) } |
400 | } |
401 | OptimizedExpr::PeekSlice(start, end_) => { |
402 | let end = QuoteOption(end_); |
403 | quote! { |
404 | state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) |
405 | } |
406 | } |
407 | OptimizedExpr::PosPred(expr) => { |
408 | let expr = generate_expr(*expr); |
409 | |
410 | quote! { |
411 | state.lookahead(true, |state| { |
412 | #expr |
413 | }) |
414 | } |
415 | } |
416 | OptimizedExpr::NegPred(expr) => { |
417 | let expr = generate_expr(*expr); |
418 | |
419 | quote! { |
420 | state.lookahead(false, |state| { |
421 | #expr |
422 | }) |
423 | } |
424 | } |
425 | OptimizedExpr::Seq(lhs, rhs) => { |
426 | let head = generate_expr(*lhs); |
427 | let mut tail = vec![]; |
428 | let mut current = *rhs; |
429 | |
430 | while let OptimizedExpr::Seq(lhs, rhs) = current { |
431 | tail.push(generate_expr(*lhs)); |
432 | current = *rhs; |
433 | } |
434 | tail.push(generate_expr(current)); |
435 | |
436 | quote! { |
437 | state.sequence(|state| { |
438 | #head |
439 | #( |
440 | .and_then(|state| { |
441 | super::hidden::skip(state) |
442 | }).and_then(|state| { |
443 | #tail |
444 | }) |
445 | )* |
446 | }) |
447 | } |
448 | } |
449 | OptimizedExpr::Choice(lhs, rhs) => { |
450 | let head = generate_expr(*lhs); |
451 | let mut tail = vec![]; |
452 | let mut current = *rhs; |
453 | |
454 | while let OptimizedExpr::Choice(lhs, rhs) = current { |
455 | tail.push(generate_expr(*lhs)); |
456 | current = *rhs; |
457 | } |
458 | tail.push(generate_expr(current)); |
459 | |
460 | quote! { |
461 | #head |
462 | #( |
463 | .or_else(|state| { |
464 | #tail |
465 | }) |
466 | )* |
467 | } |
468 | } |
469 | OptimizedExpr::Opt(expr) => { |
470 | let expr = generate_expr(*expr); |
471 | |
472 | quote! { |
473 | state.optional(|state| { |
474 | #expr |
475 | }) |
476 | } |
477 | } |
478 | OptimizedExpr::Rep(expr) => { |
479 | let expr = generate_expr(*expr); |
480 | |
481 | quote! { |
482 | state.sequence(|state| { |
483 | state.optional(|state| { |
484 | #expr.and_then(|state| { |
485 | state.repeat(|state| { |
486 | state.sequence(|state| { |
487 | super::hidden::skip( |
488 | state |
489 | ).and_then(|state| { |
490 | #expr |
491 | }) |
492 | }) |
493 | }) |
494 | }) |
495 | }) |
496 | }) |
497 | } |
498 | } |
499 | OptimizedExpr::Skip(strings) => { |
500 | quote! { |
501 | let strings = [#(#strings),*]; |
502 | |
503 | state.skip_until(&strings) |
504 | } |
505 | } |
506 | OptimizedExpr::Push(expr) => { |
507 | let expr = generate_expr(*expr); |
508 | |
509 | quote! { |
510 | state.stack_push(|state| #expr) |
511 | } |
512 | } |
513 | OptimizedExpr::RestoreOnErr(expr) => { |
514 | let expr = generate_expr(*expr); |
515 | |
516 | quote! { |
517 | state.restore_on_err(|state| #expr) |
518 | } |
519 | } |
520 | #[cfg (feature = "grammar-extras" )] |
521 | OptimizedExpr::NodeTag(expr, tag) => { |
522 | let expr = generate_expr(*expr); |
523 | quote! { |
524 | #expr.and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed(#tag))) |
525 | } |
526 | } |
527 | } |
528 | } |
529 | |
530 | fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream { |
531 | match expr { |
532 | OptimizedExpr::Str(string) => { |
533 | quote! { |
534 | state.match_string(#string) |
535 | } |
536 | } |
537 | OptimizedExpr::Insens(string) => { |
538 | quote! { |
539 | state.match_insensitive(#string) |
540 | } |
541 | } |
542 | OptimizedExpr::Range(start, end) => { |
543 | let start = start.chars().next().unwrap(); |
544 | let end = end.chars().next().unwrap(); |
545 | |
546 | quote! { |
547 | state.match_range(#start..#end) |
548 | } |
549 | } |
550 | OptimizedExpr::Ident(ident) => { |
551 | let ident = format_ident!("r# {}" , ident); |
552 | quote! { self::#ident(state) } |
553 | } |
554 | OptimizedExpr::PeekSlice(start, end_) => { |
555 | let end = QuoteOption(end_); |
556 | quote! { |
557 | state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop) |
558 | } |
559 | } |
560 | OptimizedExpr::PosPred(expr) => { |
561 | let expr = generate_expr_atomic(*expr); |
562 | |
563 | quote! { |
564 | state.lookahead(true, |state| { |
565 | #expr |
566 | }) |
567 | } |
568 | } |
569 | OptimizedExpr::NegPred(expr) => { |
570 | let expr = generate_expr_atomic(*expr); |
571 | |
572 | quote! { |
573 | state.lookahead(false, |state| { |
574 | #expr |
575 | }) |
576 | } |
577 | } |
578 | OptimizedExpr::Seq(lhs, rhs) => { |
579 | let head = generate_expr_atomic(*lhs); |
580 | let mut tail = vec![]; |
581 | let mut current = *rhs; |
582 | |
583 | while let OptimizedExpr::Seq(lhs, rhs) = current { |
584 | tail.push(generate_expr_atomic(*lhs)); |
585 | current = *rhs; |
586 | } |
587 | tail.push(generate_expr_atomic(current)); |
588 | |
589 | quote! { |
590 | state.sequence(|state| { |
591 | #head |
592 | #( |
593 | .and_then(|state| { |
594 | #tail |
595 | }) |
596 | )* |
597 | }) |
598 | } |
599 | } |
600 | OptimizedExpr::Choice(lhs, rhs) => { |
601 | let head = generate_expr_atomic(*lhs); |
602 | let mut tail = vec![]; |
603 | let mut current = *rhs; |
604 | |
605 | while let OptimizedExpr::Choice(lhs, rhs) = current { |
606 | tail.push(generate_expr_atomic(*lhs)); |
607 | current = *rhs; |
608 | } |
609 | tail.push(generate_expr_atomic(current)); |
610 | |
611 | quote! { |
612 | #head |
613 | #( |
614 | .or_else(|state| { |
615 | #tail |
616 | }) |
617 | )* |
618 | } |
619 | } |
620 | OptimizedExpr::Opt(expr) => { |
621 | let expr = generate_expr_atomic(*expr); |
622 | |
623 | quote! { |
624 | state.optional(|state| { |
625 | #expr |
626 | }) |
627 | } |
628 | } |
629 | OptimizedExpr::Rep(expr) => { |
630 | let expr = generate_expr_atomic(*expr); |
631 | |
632 | quote! { |
633 | state.repeat(|state| { |
634 | #expr |
635 | }) |
636 | } |
637 | } |
638 | OptimizedExpr::Skip(strings) => { |
639 | quote! { |
640 | let strings = [#(#strings),*]; |
641 | |
642 | state.skip_until(&strings) |
643 | } |
644 | } |
645 | OptimizedExpr::Push(expr) => { |
646 | let expr = generate_expr_atomic(*expr); |
647 | |
648 | quote! { |
649 | state.stack_push(|state| #expr) |
650 | } |
651 | } |
652 | OptimizedExpr::RestoreOnErr(expr) => { |
653 | let expr = generate_expr_atomic(*expr); |
654 | |
655 | quote! { |
656 | state.restore_on_err(|state| #expr) |
657 | } |
658 | } |
659 | #[cfg (feature = "grammar-extras" )] |
660 | OptimizedExpr::NodeTag(expr, tag) => { |
661 | let expr = generate_expr_atomic(*expr); |
662 | quote! { |
663 | #expr.and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed(#tag))) |
664 | } |
665 | } |
666 | } |
667 | } |
668 | |
669 | struct QuoteOption<T>(Option<T>); |
670 | |
671 | impl<T: ToTokens> ToTokens for QuoteOption<T> { |
672 | fn to_tokens(&self, tokens: &mut TokenStream) { |
673 | let option: TokenStream = option_type(); |
674 | tokens.append_all(iter:match self.0 { |
675 | Some(ref t: &T) => quote! { #option::Some(#t) }, |
676 | None => quote! { #option::None }, |
677 | }); |
678 | } |
679 | } |
680 | |
681 | fn box_type() -> TokenStream { |
682 | #[cfg (feature = "std" )] |
683 | quote! { ::std::boxed::Box } |
684 | |
685 | #[cfg (not(feature = "std" ))] |
686 | quote! { ::alloc::boxed::Box } |
687 | } |
688 | |
689 | fn result_type() -> TokenStream { |
690 | #[cfg (feature = "std" )] |
691 | quote! { ::std::result::Result } |
692 | |
693 | #[cfg (not(feature = "std" ))] |
694 | quote! { ::core::result::Result } |
695 | } |
696 | |
697 | fn option_type() -> TokenStream { |
698 | #[cfg (feature = "std" )] |
699 | quote! { ::std::option::Option } |
700 | |
701 | #[cfg (not(feature = "std" ))] |
702 | quote! { ::core::option::Option } |
703 | } |
704 | |
705 | #[cfg (test)] |
706 | mod tests { |
707 | use super::*; |
708 | |
709 | use proc_macro2::Span; |
710 | use std::collections::HashMap; |
711 | |
712 | #[test ] |
713 | fn rule_enum_simple() { |
714 | let rules = vec![OptimizedRule { |
715 | name: "f" .to_owned(), |
716 | ty: RuleType::Normal, |
717 | expr: OptimizedExpr::Ident("g" .to_owned()), |
718 | }]; |
719 | |
720 | let mut line_docs = HashMap::new(); |
721 | line_docs.insert("f" .to_owned(), "This is rule comment" .to_owned()); |
722 | |
723 | let doc_comment = &DocComment { |
724 | grammar_doc: "Rule doc \nhello" .to_owned(), |
725 | line_docs, |
726 | }; |
727 | |
728 | assert_eq!( |
729 | generate_enum(&rules, doc_comment, false).to_string(), |
730 | quote! { |
731 | #[doc = "Rule doc \nhello" ] |
732 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
733 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
734 | pub enum Rule { |
735 | #[doc = "This is rule comment" ] |
736 | r#f |
737 | } |
738 | } |
739 | .to_string() |
740 | ); |
741 | } |
742 | |
743 | #[test ] |
744 | fn sequence() { |
745 | let expr = OptimizedExpr::Seq( |
746 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
747 | Box::new(OptimizedExpr::Seq( |
748 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
749 | Box::new(OptimizedExpr::Seq( |
750 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
751 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
752 | )), |
753 | )), |
754 | ); |
755 | |
756 | assert_eq!( |
757 | generate_expr(expr).to_string(), |
758 | quote! { |
759 | state.sequence(|state| { |
760 | state.match_string("a" ).and_then(|state| { |
761 | super::hidden::skip(state) |
762 | }).and_then(|state| { |
763 | state.match_string("b" ) |
764 | }).and_then(|state| { |
765 | super::hidden::skip(state) |
766 | }).and_then(|state| { |
767 | state.match_string("c" ) |
768 | }).and_then(|state| { |
769 | super::hidden::skip(state) |
770 | }).and_then(|state| { |
771 | state.match_string("d" ) |
772 | }) |
773 | }) |
774 | } |
775 | .to_string() |
776 | ); |
777 | } |
778 | |
779 | #[test ] |
780 | fn sequence_atomic() { |
781 | let expr = OptimizedExpr::Seq( |
782 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
783 | Box::new(OptimizedExpr::Seq( |
784 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
785 | Box::new(OptimizedExpr::Seq( |
786 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
787 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
788 | )), |
789 | )), |
790 | ); |
791 | |
792 | assert_eq!( |
793 | generate_expr_atomic(expr).to_string(), |
794 | quote! { |
795 | state.sequence(|state| { |
796 | state.match_string("a" ).and_then(|state| { |
797 | state.match_string("b" ) |
798 | }).and_then(|state| { |
799 | state.match_string("c" ) |
800 | }).and_then(|state| { |
801 | state.match_string("d" ) |
802 | }) |
803 | }) |
804 | } |
805 | .to_string() |
806 | ); |
807 | } |
808 | |
809 | #[test ] |
810 | fn choice() { |
811 | let expr = OptimizedExpr::Choice( |
812 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
813 | Box::new(OptimizedExpr::Choice( |
814 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
815 | Box::new(OptimizedExpr::Choice( |
816 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
817 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
818 | )), |
819 | )), |
820 | ); |
821 | |
822 | assert_eq!( |
823 | generate_expr(expr).to_string(), |
824 | quote! { |
825 | state.match_string("a" ).or_else(|state| { |
826 | state.match_string("b" ) |
827 | }).or_else(|state| { |
828 | state.match_string("c" ) |
829 | }).or_else(|state| { |
830 | state.match_string("d" ) |
831 | }) |
832 | } |
833 | .to_string() |
834 | ); |
835 | } |
836 | |
837 | #[test ] |
838 | fn choice_atomic() { |
839 | let expr = OptimizedExpr::Choice( |
840 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
841 | Box::new(OptimizedExpr::Choice( |
842 | Box::new(OptimizedExpr::Str("b" .to_owned())), |
843 | Box::new(OptimizedExpr::Choice( |
844 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
845 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
846 | )), |
847 | )), |
848 | ); |
849 | |
850 | assert_eq!( |
851 | generate_expr_atomic(expr).to_string(), |
852 | quote! { |
853 | state.match_string("a" ).or_else(|state| { |
854 | state.match_string("b" ) |
855 | }).or_else(|state| { |
856 | state.match_string("c" ) |
857 | }).or_else(|state| { |
858 | state.match_string("d" ) |
859 | }) |
860 | } |
861 | .to_string() |
862 | ); |
863 | } |
864 | |
865 | #[test ] |
866 | fn skip() { |
867 | let expr = OptimizedExpr::Skip(vec!["a" .to_owned(), "b" .to_owned()]); |
868 | |
869 | assert_eq!( |
870 | generate_expr_atomic(expr).to_string(), |
871 | quote! { |
872 | let strings = ["a" , "b" ]; |
873 | |
874 | state.skip_until(&strings) |
875 | } |
876 | .to_string() |
877 | ); |
878 | } |
879 | |
880 | #[test ] |
881 | fn expr_complex() { |
882 | let expr = OptimizedExpr::Choice( |
883 | Box::new(OptimizedExpr::Ident("a" .to_owned())), |
884 | Box::new(OptimizedExpr::Seq( |
885 | Box::new(OptimizedExpr::Range("a" .to_owned(), "b" .to_owned())), |
886 | Box::new(OptimizedExpr::Seq( |
887 | Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( |
888 | Box::new(OptimizedExpr::Insens("b" .to_owned())), |
889 | )))), |
890 | Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( |
891 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( |
892 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
893 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
894 | )))), |
895 | )))), |
896 | )), |
897 | )), |
898 | ); |
899 | |
900 | let sequence = quote! { |
901 | state.sequence(|state| { |
902 | super::hidden::skip(state).and_then( |
903 | |state| { |
904 | state.match_insensitive("b" ) |
905 | } |
906 | ) |
907 | }) |
908 | }; |
909 | let repeat = quote! { |
910 | state.repeat(|state| { |
911 | state.sequence(|state| { |
912 | super::hidden::skip(state).and_then(|state| { |
913 | state.match_string("c" ) |
914 | .or_else(|state| { |
915 | state.match_string("d" ) |
916 | }) |
917 | }) |
918 | }) |
919 | }) |
920 | }; |
921 | assert_eq!( |
922 | generate_expr(expr).to_string(), |
923 | quote! { |
924 | self::r#a(state).or_else(|state| { |
925 | state.sequence(|state| { |
926 | state.match_range('a' ..'b' ).and_then(|state| { |
927 | super::hidden::skip(state) |
928 | }).and_then(|state| { |
929 | state.lookahead(false, |state| { |
930 | state.sequence(|state| { |
931 | state.optional(|state| { |
932 | state.match_insensitive( |
933 | "b" |
934 | ).and_then(|state| { |
935 | state.repeat(|state| { |
936 | #sequence |
937 | }) |
938 | }) |
939 | }) |
940 | }) |
941 | }) |
942 | }).and_then(|state| { |
943 | super::hidden::skip(state) |
944 | }).and_then(|state| { |
945 | state.lookahead(true, |state| { |
946 | state.optional(|state| { |
947 | state.sequence(|state| { |
948 | state.optional(|state| { |
949 | state.match_string("c" ) |
950 | .or_else(|state| { |
951 | state.match_string("d" ) |
952 | }).and_then(|state| { |
953 | #repeat |
954 | }) |
955 | }) |
956 | }) |
957 | }) |
958 | }) |
959 | }) |
960 | }) |
961 | }) |
962 | } |
963 | .to_string() |
964 | ); |
965 | } |
966 | |
967 | #[test ] |
968 | fn expr_complex_atomic() { |
969 | let expr = OptimizedExpr::Choice( |
970 | Box::new(OptimizedExpr::Ident("a" .to_owned())), |
971 | Box::new(OptimizedExpr::Seq( |
972 | Box::new(OptimizedExpr::Range("a" .to_owned(), "b" .to_owned())), |
973 | Box::new(OptimizedExpr::Seq( |
974 | Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Rep( |
975 | Box::new(OptimizedExpr::Insens("b" .to_owned())), |
976 | )))), |
977 | Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Opt( |
978 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Choice( |
979 | Box::new(OptimizedExpr::Str("c" .to_owned())), |
980 | Box::new(OptimizedExpr::Str("d" .to_owned())), |
981 | )))), |
982 | )))), |
983 | )), |
984 | )), |
985 | ); |
986 | |
987 | assert_eq!( |
988 | generate_expr_atomic(expr).to_string(), |
989 | quote! { |
990 | self::r#a(state).or_else(|state| { |
991 | state.sequence(|state| { |
992 | state.match_range('a' ..'b' ).and_then(|state| { |
993 | state.lookahead(false, |state| { |
994 | state.repeat(|state| { |
995 | state.match_insensitive("b" ) |
996 | }) |
997 | }) |
998 | }).and_then(|state| { |
999 | state.lookahead(true, |state| { |
1000 | state.optional(|state| { |
1001 | state.repeat(|state| { |
1002 | state.match_string("c" ) |
1003 | .or_else(|state| { |
1004 | state.match_string("d" ) |
1005 | }) |
1006 | }) |
1007 | }) |
1008 | }) |
1009 | }) |
1010 | }) |
1011 | }) |
1012 | } |
1013 | .to_string() |
1014 | ); |
1015 | } |
1016 | |
1017 | #[test ] |
1018 | fn test_generate_complete() { |
1019 | let name = Ident::new("MyParser" , Span::call_site()); |
1020 | let generics = Generics::default(); |
1021 | |
1022 | let rules = vec![ |
1023 | OptimizedRule { |
1024 | name: "a" .to_owned(), |
1025 | ty: RuleType::Silent, |
1026 | expr: OptimizedExpr::Str("b" .to_owned()), |
1027 | }, |
1028 | OptimizedRule { |
1029 | name: "if" .to_owned(), |
1030 | ty: RuleType::Silent, |
1031 | expr: OptimizedExpr::Ident("a" .to_owned()), |
1032 | }, |
1033 | ]; |
1034 | |
1035 | let mut line_docs = HashMap::new(); |
1036 | line_docs.insert("if" .to_owned(), "If statement" .to_owned()); |
1037 | |
1038 | let doc_comment = &DocComment { |
1039 | line_docs, |
1040 | grammar_doc: "This is Rule doc \nThis is second line" .to_owned(), |
1041 | }; |
1042 | |
1043 | let defaults = vec!["ANY" ]; |
1044 | let result = result_type(); |
1045 | let box_ty = box_type(); |
1046 | let current_dir = std::env::current_dir().expect("Unable to get current directory" ); |
1047 | |
1048 | let base_path = current_dir.join("base.pest" ).to_str().unwrap().to_string(); |
1049 | let test_path = current_dir.join("test.pest" ).to_str().unwrap().to_string(); |
1050 | |
1051 | assert_eq!( |
1052 | generate(name, &generics, vec![PathBuf::from("base.pest" ), PathBuf::from("test.pest" )], rules, defaults, doc_comment, true).to_string(), |
1053 | quote! { |
1054 | #[allow(non_upper_case_globals)] |
1055 | const _PEST_GRAMMAR_MyParser: [&'static str; 2usize] = [include_str!(#base_path), include_str!(#test_path)]; |
1056 | |
1057 | #[doc = "This is Rule doc \nThis is second line" ] |
1058 | #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] |
1059 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1060 | pub enum Rule { |
1061 | r#a, |
1062 | #[doc = "If statement" ] |
1063 | r#if |
1064 | } |
1065 | |
1066 | #[allow(clippy::all)] |
1067 | impl ::pest::Parser<Rule> for MyParser { |
1068 | fn parse<'i>( |
1069 | rule: Rule, |
1070 | input: &'i str |
1071 | ) -> #result< |
1072 | ::pest::iterators::Pairs<'i, Rule>, |
1073 | ::pest::error::Error<Rule> |
1074 | > { |
1075 | mod rules { |
1076 | #![allow(clippy::upper_case_acronyms)] |
1077 | pub mod hidden { |
1078 | use super::super::Rule; |
1079 | |
1080 | #[inline] |
1081 | #[allow(dead_code, non_snake_case, unused_variables)] |
1082 | pub fn skip(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1083 | Ok(state) |
1084 | } |
1085 | } |
1086 | |
1087 | pub mod visible { |
1088 | use super::super::Rule; |
1089 | |
1090 | #[inline] |
1091 | #[allow(non_snake_case, unused_variables)] |
1092 | pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1093 | state.match_string("b" ) |
1094 | } |
1095 | |
1096 | #[inline] |
1097 | #[allow(non_snake_case, unused_variables)] |
1098 | pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1099 | self::r#a(state) |
1100 | } |
1101 | |
1102 | #[inline] |
1103 | #[allow(dead_code, non_snake_case, unused_variables)] |
1104 | pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { |
1105 | state.skip(1) |
1106 | } |
1107 | } |
1108 | |
1109 | pub use self::visible::*; |
1110 | } |
1111 | |
1112 | ::pest::state(input, |state| { |
1113 | match rule { |
1114 | Rule::r#a => rules::r#a(state), |
1115 | Rule::r#if => rules::r#if(state) |
1116 | } |
1117 | }) |
1118 | } |
1119 | } |
1120 | }.to_string() |
1121 | ); |
1122 | } |
1123 | } |
1124 | |