1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | //! Types and helpers for the pest's own grammar parser. |
11 | |
12 | use std::char; |
13 | use std::iter::Peekable; |
14 | |
15 | use pest::error::{Error, ErrorVariant}; |
16 | use pest::iterators::{Pair, Pairs}; |
17 | use pest::pratt_parser::{Assoc, Op, PrattParser}; |
18 | use pest::{Parser, Position, Span}; |
19 | |
20 | use crate::ast::{Expr, Rule as AstRule, RuleType}; |
21 | use crate::validator; |
22 | |
23 | #[allow (missing_docs, unused_qualifications)] |
24 | mod grammar { |
25 | #[cfg (not(feature = "not-bootstrap-in-src" ))] |
26 | include!("grammar.rs" ); |
27 | |
28 | #[cfg (feature = "not-bootstrap-in-src" )] |
29 | include!(concat!(env!("OUT_DIR" ), "/__pest_grammar.rs" )); |
30 | } |
31 | |
32 | pub use self::grammar::*; |
33 | |
34 | /// A helper that will parse using the pest grammar |
35 | #[allow (clippy::perf)] |
36 | pub fn parse(rule: Rule, data: &str) -> Result<Pairs<'_, Rule>, Error<Rule>> { |
37 | PestParser::parse(rule, input:data) |
38 | } |
39 | |
40 | /// The pest grammar rule |
41 | #[derive (Clone, Debug, Eq, PartialEq)] |
42 | pub struct ParserRule<'i> { |
43 | /// The rule's name |
44 | pub name: String, |
45 | /// The rule's span |
46 | pub span: Span<'i>, |
47 | /// The rule's type |
48 | pub ty: RuleType, |
49 | /// The rule's parser node |
50 | pub node: ParserNode<'i>, |
51 | } |
52 | |
53 | /// The pest grammar node |
54 | #[derive (Clone, Debug, Eq, PartialEq)] |
55 | pub struct ParserNode<'i> { |
56 | /// The node's expression |
57 | pub expr: ParserExpr<'i>, |
58 | /// The node's span |
59 | pub span: Span<'i>, |
60 | } |
61 | |
62 | impl<'i> ParserNode<'i> { |
63 | /// will remove nodes that do not match `f` |
64 | pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T> |
65 | where |
66 | F: FnMut(ParserNode<'i>) -> Option<T>, |
67 | { |
68 | pub fn filter_internal<'i, F, T>(node: ParserNode<'i>, f: &mut F, result: &mut Vec<T>) |
69 | where |
70 | F: FnMut(ParserNode<'i>) -> Option<T>, |
71 | { |
72 | if let Some(value) = f(node.clone()) { |
73 | result.push(value); |
74 | } |
75 | |
76 | match node.expr { |
77 | ParserExpr::PosPred(node) => { |
78 | filter_internal(*node, f, result); |
79 | } |
80 | ParserExpr::NegPred(node) => { |
81 | filter_internal(*node, f, result); |
82 | } |
83 | ParserExpr::Seq(lhs, rhs) => { |
84 | filter_internal(*lhs, f, result); |
85 | filter_internal(*rhs, f, result); |
86 | } |
87 | ParserExpr::Choice(lhs, rhs) => { |
88 | filter_internal(*lhs, f, result); |
89 | filter_internal(*rhs, f, result); |
90 | } |
91 | ParserExpr::Rep(node) => { |
92 | filter_internal(*node, f, result); |
93 | } |
94 | ParserExpr::RepOnce(node) => { |
95 | filter_internal(*node, f, result); |
96 | } |
97 | ParserExpr::RepExact(node, _) => { |
98 | filter_internal(*node, f, result); |
99 | } |
100 | ParserExpr::RepMin(node, _) => { |
101 | filter_internal(*node, f, result); |
102 | } |
103 | ParserExpr::RepMax(node, _) => { |
104 | filter_internal(*node, f, result); |
105 | } |
106 | ParserExpr::RepMinMax(node, ..) => { |
107 | filter_internal(*node, f, result); |
108 | } |
109 | ParserExpr::Opt(node) => { |
110 | filter_internal(*node, f, result); |
111 | } |
112 | ParserExpr::Push(node) => { |
113 | filter_internal(*node, f, result); |
114 | } |
115 | _ => (), |
116 | } |
117 | } |
118 | |
119 | let mut result = vec![]; |
120 | |
121 | filter_internal(self, &mut f, &mut result); |
122 | |
123 | result |
124 | } |
125 | } |
126 | |
127 | /// All possible parser expressions |
128 | #[derive (Clone, Debug, Eq, PartialEq)] |
129 | pub enum ParserExpr<'i> { |
130 | /// Matches an exact string, e.g. `"a"` |
131 | Str(String), |
132 | /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` |
133 | Insens(String), |
134 | /// Matches one character in the range, e.g. `'a'..'z'` |
135 | Range(String, String), |
136 | /// Matches the rule with the given name, e.g. `a` |
137 | Ident(String), |
138 | /// Matches a custom part of the stack, e.g. `PEEK[..]` |
139 | PeekSlice(i32, Option<i32>), |
140 | /// Positive lookahead; matches expression without making progress, e.g. `&e` |
141 | PosPred(Box<ParserNode<'i>>), |
142 | /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` |
143 | NegPred(Box<ParserNode<'i>>), |
144 | /// Matches a sequence of two expressions, e.g. `e1 ~ e2` |
145 | Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>), |
146 | /// Matches either of two expressions, e.g. `e1 | e2` |
147 | Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>), |
148 | /// Optionally matches an expression, e.g. `e?` |
149 | Opt(Box<ParserNode<'i>>), |
150 | /// Matches an expression zero or more times, e.g. `e*` |
151 | Rep(Box<ParserNode<'i>>), |
152 | /// Matches an expression one or more times, e.g. `e+` |
153 | RepOnce(Box<ParserNode<'i>>), |
154 | /// Matches an expression an exact number of times, e.g. `e{n}` |
155 | RepExact(Box<ParserNode<'i>>, u32), |
156 | /// Matches an expression at least a number of times, e.g. `e{n,}` |
157 | RepMin(Box<ParserNode<'i>>, u32), |
158 | /// Matches an expression at most a number of times, e.g. `e{,n}` |
159 | RepMax(Box<ParserNode<'i>>, u32), |
160 | /// Matches an expression a number of times within a range, e.g. `e{m, n}` |
161 | RepMinMax(Box<ParserNode<'i>>, u32, u32), |
162 | /// Matches an expression and pushes it to the stack, e.g. `push(e)` |
163 | Push(Box<ParserNode<'i>>), |
164 | /// Matches an expression and assigns a label to it, e.g. #label = exp |
165 | #[cfg (feature = "grammar-extras" )] |
166 | NodeTag(Box<ParserNode<'i>>, String), |
167 | } |
168 | |
169 | fn convert_rule(rule: ParserRule<'_>) -> AstRule { |
170 | let ParserRule { name: String, ty: RuleType, node: ParserNode<'_>, .. } = rule; |
171 | let expr: Expr = convert_node(node); |
172 | AstRule { name, ty, expr } |
173 | } |
174 | |
175 | fn convert_node(node: ParserNode<'_>) -> Expr { |
176 | match node.expr { |
177 | ParserExpr::Str(string) => Expr::Str(string), |
178 | ParserExpr::Insens(string) => Expr::Insens(string), |
179 | ParserExpr::Range(start, end) => Expr::Range(start, end), |
180 | ParserExpr::Ident(ident) => Expr::Ident(ident), |
181 | ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end), |
182 | ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))), |
183 | ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))), |
184 | ParserExpr::Seq(node1, node2) => Expr::Seq( |
185 | Box::new(convert_node(*node1)), |
186 | Box::new(convert_node(*node2)), |
187 | ), |
188 | ParserExpr::Choice(node1, node2) => Expr::Choice( |
189 | Box::new(convert_node(*node1)), |
190 | Box::new(convert_node(*node2)), |
191 | ), |
192 | ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))), |
193 | ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))), |
194 | ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))), |
195 | ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num), |
196 | ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max), |
197 | ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max), |
198 | ParserExpr::RepMinMax(node, min, max) => { |
199 | Expr::RepMinMax(Box::new(convert_node(*node)), min, max) |
200 | } |
201 | ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))), |
202 | #[cfg (feature = "grammar-extras" )] |
203 | ParserExpr::NodeTag(node, tag) => Expr::NodeTag(Box::new(convert_node(*node)), tag), |
204 | } |
205 | } |
206 | |
207 | /// Converts a parser's result (`Pairs`) to an AST |
208 | pub fn consume_rules(pairs: Pairs<'_, Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> { |
209 | let rules: Vec> = consume_rules_with_spans(pairs)?; |
210 | let errors: Vec> = validator::validate_ast(&rules); |
211 | if errors.is_empty() { |
212 | Ok(rules.into_iter().map(convert_rule).collect()) |
213 | } else { |
214 | Err(errors) |
215 | } |
216 | } |
217 | |
218 | /// A helper function to rename verbose rules |
219 | /// for the sake of better error messages |
220 | #[inline ] |
221 | pub fn rename_meta_rule(rule: &Rule) -> String { |
222 | match *rule { |
223 | Rule::grammar_rule => "rule" .to_owned(), |
224 | Rule::_push => "PUSH" .to_owned(), |
225 | Rule::assignment_operator => "`=`" .to_owned(), |
226 | Rule::silent_modifier => "`_`" .to_owned(), |
227 | Rule::atomic_modifier => "`@`" .to_owned(), |
228 | Rule::compound_atomic_modifier => "`$`" .to_owned(), |
229 | Rule::non_atomic_modifier => "`!`" .to_owned(), |
230 | Rule::opening_brace => "`{`" .to_owned(), |
231 | Rule::closing_brace => "`}`" .to_owned(), |
232 | Rule::opening_brack => "`[`" .to_owned(), |
233 | Rule::closing_brack => "`]`" .to_owned(), |
234 | Rule::opening_paren => "`(`" .to_owned(), |
235 | Rule::positive_predicate_operator => "`&`" .to_owned(), |
236 | Rule::negative_predicate_operator => "`!`" .to_owned(), |
237 | Rule::sequence_operator => "`&`" .to_owned(), |
238 | Rule::choice_operator => "`|`" .to_owned(), |
239 | Rule::optional_operator => "`?`" .to_owned(), |
240 | Rule::repeat_operator => "`*`" .to_owned(), |
241 | Rule::repeat_once_operator => "`+`" .to_owned(), |
242 | Rule::comma => "`,`" .to_owned(), |
243 | Rule::closing_paren => "`)`" .to_owned(), |
244 | Rule::quote => "` \"`" .to_owned(), |
245 | Rule::insensitive_string => "`^`" .to_owned(), |
246 | Rule::range_operator => "`..`" .to_owned(), |
247 | Rule::single_quote => "`'`" .to_owned(), |
248 | Rule::grammar_doc => "//!" .to_owned(), |
249 | Rule::line_doc => "///" .to_owned(), |
250 | other_rule => format!(" {:?}" , other_rule), |
251 | } |
252 | } |
253 | |
254 | fn consume_rules_with_spans( |
255 | pairs: Pairs<'_, Rule>, |
256 | ) -> Result<Vec<ParserRule<'_>>, Vec<Error<Rule>>> { |
257 | let pratt = PrattParser::new() |
258 | .op(Op::infix(Rule::choice_operator, Assoc::Left)) |
259 | .op(Op::infix(Rule::sequence_operator, Assoc::Left)); |
260 | |
261 | pairs |
262 | .filter(|pair| pair.as_rule() == Rule::grammar_rule) |
263 | .filter(|pair| { |
264 | // To ignore `grammar_rule > line_doc` pairs |
265 | let mut pairs = pair.clone().into_inner(); |
266 | let pair = pairs.next().unwrap(); |
267 | |
268 | pair.as_rule() != Rule::line_doc |
269 | }) |
270 | .map(|pair| { |
271 | let mut pairs = pair.into_inner().peekable(); |
272 | |
273 | let span = pairs.next().unwrap().as_span(); |
274 | let name = span.as_str().to_owned(); |
275 | |
276 | pairs.next().unwrap(); // assignment_operator |
277 | |
278 | let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace { |
279 | match pairs.next().unwrap().as_rule() { |
280 | Rule::silent_modifier => RuleType::Silent, |
281 | Rule::atomic_modifier => RuleType::Atomic, |
282 | Rule::compound_atomic_modifier => RuleType::CompoundAtomic, |
283 | Rule::non_atomic_modifier => RuleType::NonAtomic, |
284 | _ => unreachable!(), |
285 | } |
286 | } else { |
287 | RuleType::Normal |
288 | }; |
289 | |
290 | pairs.next().unwrap(); // opening_brace |
291 | |
292 | // skip initial infix operators |
293 | let mut inner_nodes = pairs.next().unwrap().into_inner().peekable(); |
294 | if inner_nodes.peek().unwrap().as_rule() == Rule::choice_operator { |
295 | inner_nodes.next().unwrap(); |
296 | } |
297 | |
298 | let node = consume_expr(inner_nodes, &pratt)?; |
299 | |
300 | Ok(ParserRule { |
301 | name, |
302 | span, |
303 | ty, |
304 | node, |
305 | }) |
306 | }) |
307 | .collect() |
308 | } |
309 | |
310 | fn get_node_tag<'i>( |
311 | pairs: &mut Peekable<Pairs<'i, Rule>>, |
312 | ) -> (Pair<'i, Rule>, Option<(String, Position<'i>)>) { |
313 | let pair_or_tag: Pair<'_, Rule> = pairs.next().unwrap(); |
314 | if let Some(next_pair: &Pair<'_, Rule>) = pairs.peek() { |
315 | if next_pair.as_rule() == Rule::assignment_operator { |
316 | pairs.next().unwrap(); |
317 | let pair: Pair<'_, Rule> = pairs.next().unwrap(); |
318 | ( |
319 | pair, |
320 | Some(( |
321 | pair_or_tag.as_str()[1..].to_string(), |
322 | pair_or_tag.as_span().start_pos(), |
323 | )), |
324 | ) |
325 | } else { |
326 | (pair_or_tag, None) |
327 | } |
328 | } else { |
329 | (pair_or_tag, None) |
330 | } |
331 | } |
332 | |
333 | fn consume_expr<'i>( |
334 | pairs: Peekable<Pairs<'i, Rule>>, |
335 | pratt: &PrattParser<Rule>, |
336 | ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> { |
337 | fn unaries<'i>( |
338 | mut pairs: Peekable<Pairs<'i, Rule>>, |
339 | pratt: &PrattParser<Rule>, |
340 | ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> { |
341 | #[cfg (feature = "grammar-extras" )] |
342 | let (pair, tag_start) = get_node_tag(&mut pairs); |
343 | #[cfg (not(feature = "grammar-extras" ))] |
344 | let (pair, _tag_start) = get_node_tag(&mut pairs); |
345 | |
346 | let node = match pair.as_rule() { |
347 | Rule::opening_paren => { |
348 | let node = unaries(pairs, pratt)?; |
349 | let end = node.span.end_pos(); |
350 | |
351 | ParserNode { |
352 | expr: node.expr, |
353 | span: pair.as_span().start_pos().span(&end), |
354 | } |
355 | } |
356 | Rule::positive_predicate_operator => { |
357 | let node = unaries(pairs, pratt)?; |
358 | let end = node.span.end_pos(); |
359 | |
360 | ParserNode { |
361 | expr: ParserExpr::PosPred(Box::new(node)), |
362 | span: pair.as_span().start_pos().span(&end), |
363 | } |
364 | } |
365 | Rule::negative_predicate_operator => { |
366 | let node = unaries(pairs, pratt)?; |
367 | let end = node.span.end_pos(); |
368 | |
369 | ParserNode { |
370 | expr: ParserExpr::NegPred(Box::new(node)), |
371 | span: pair.as_span().start_pos().span(&end), |
372 | } |
373 | } |
374 | other_rule => { |
375 | let node = match other_rule { |
376 | Rule::expression => consume_expr(pair.into_inner().peekable(), pratt)?, |
377 | Rule::_push => { |
378 | let start = pair.clone().as_span().start_pos(); |
379 | let mut pairs = pair.into_inner(); |
380 | pairs.next().unwrap(); // opening_paren |
381 | let pair = pairs.next().unwrap(); |
382 | |
383 | let node = consume_expr(pair.into_inner().peekable(), pratt)?; |
384 | let end = node.span.end_pos(); |
385 | |
386 | ParserNode { |
387 | expr: ParserExpr::Push(Box::new(node)), |
388 | span: start.span(&end), |
389 | } |
390 | } |
391 | Rule::peek_slice => { |
392 | let mut pairs = pair.clone().into_inner(); |
393 | pairs.next().unwrap(); // opening_brack |
394 | let pair_start = pairs.next().unwrap(); // .. or integer |
395 | let start: i32 = match pair_start.as_rule() { |
396 | Rule::range_operator => 0, |
397 | Rule::integer => { |
398 | pairs.next().unwrap(); // .. |
399 | pair_start.as_str().parse().unwrap() |
400 | } |
401 | _ => unreachable!("peek start" ), |
402 | }; |
403 | let pair_end = pairs.next().unwrap(); // integer or } |
404 | let end: Option<i32> = match pair_end.as_rule() { |
405 | Rule::closing_brack => None, |
406 | Rule::integer => { |
407 | pairs.next().unwrap(); // } |
408 | Some(pair_end.as_str().parse().unwrap()) |
409 | } |
410 | _ => unreachable!("peek end" ), |
411 | }; |
412 | ParserNode { |
413 | expr: ParserExpr::PeekSlice(start, end), |
414 | span: pair.as_span(), |
415 | } |
416 | } |
417 | Rule::identifier => ParserNode { |
418 | expr: ParserExpr::Ident(pair.as_str().to_owned()), |
419 | span: pair.clone().as_span(), |
420 | }, |
421 | Rule::string => { |
422 | let string = unescape(pair.as_str()).expect("incorrect string literal" ); |
423 | ParserNode { |
424 | expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()), |
425 | span: pair.clone().as_span(), |
426 | } |
427 | } |
428 | Rule::insensitive_string => { |
429 | let string = unescape(pair.as_str()).expect("incorrect string literal" ); |
430 | ParserNode { |
431 | expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()), |
432 | span: pair.clone().as_span(), |
433 | } |
434 | } |
435 | Rule::range => { |
436 | let mut pairs = pair.into_inner(); |
437 | let pair = pairs.next().unwrap(); |
438 | let start = unescape(pair.as_str()).expect("incorrect char literal" ); |
439 | let start_pos = pair.clone().as_span().start_pos(); |
440 | pairs.next(); |
441 | let pair = pairs.next().unwrap(); |
442 | let end = unescape(pair.as_str()).expect("incorrect char literal" ); |
443 | let end_pos = pair.clone().as_span().end_pos(); |
444 | |
445 | ParserNode { |
446 | expr: ParserExpr::Range( |
447 | start[1..start.len() - 1].to_owned(), |
448 | end[1..end.len() - 1].to_owned(), |
449 | ), |
450 | span: start_pos.span(&end_pos), |
451 | } |
452 | } |
453 | x => unreachable!("other rule: {:?}" , x), |
454 | }; |
455 | |
456 | pairs.fold( |
457 | Ok(node), |
458 | |node: Result<ParserNode<'i>, Vec<Error<Rule>>>, pair| { |
459 | let node = node?; |
460 | |
461 | let node = match pair.as_rule() { |
462 | Rule::optional_operator => { |
463 | let start = node.span.start_pos(); |
464 | ParserNode { |
465 | expr: ParserExpr::Opt(Box::new(node)), |
466 | span: start.span(&pair.as_span().end_pos()), |
467 | } |
468 | } |
469 | Rule::repeat_operator => { |
470 | let start = node.span.start_pos(); |
471 | ParserNode { |
472 | expr: ParserExpr::Rep(Box::new(node)), |
473 | span: start.span(&pair.as_span().end_pos()), |
474 | } |
475 | } |
476 | Rule::repeat_once_operator => { |
477 | let start = node.span.start_pos(); |
478 | ParserNode { |
479 | expr: ParserExpr::RepOnce(Box::new(node)), |
480 | span: start.span(&pair.as_span().end_pos()), |
481 | } |
482 | } |
483 | Rule::repeat_exact => { |
484 | let mut inner = pair.clone().into_inner(); |
485 | |
486 | inner.next().unwrap(); // opening_brace |
487 | |
488 | let number = inner.next().unwrap(); |
489 | let num = if let Ok(num) = number.as_str().parse::<u32>() { |
490 | num |
491 | } else { |
492 | return Err(vec![Error::new_from_span( |
493 | ErrorVariant::CustomError { |
494 | message: "number cannot overflow u32" .to_owned(), |
495 | }, |
496 | number.as_span(), |
497 | )]); |
498 | }; |
499 | |
500 | if num == 0 { |
501 | let error: Error<Rule> = Error::new_from_span( |
502 | ErrorVariant::CustomError { |
503 | message: "cannot repeat 0 times" .to_owned(), |
504 | }, |
505 | number.as_span(), |
506 | ); |
507 | |
508 | return Err(vec![error]); |
509 | } |
510 | |
511 | let start = node.span.start_pos(); |
512 | ParserNode { |
513 | expr: ParserExpr::RepExact(Box::new(node), num), |
514 | span: start.span(&pair.as_span().end_pos()), |
515 | } |
516 | } |
517 | Rule::repeat_min => { |
518 | let mut inner = pair.clone().into_inner(); |
519 | |
520 | inner.next().unwrap(); // opening_brace |
521 | |
522 | let min_number = inner.next().unwrap(); |
523 | let min = if let Ok(min) = min_number.as_str().parse::<u32>() { |
524 | min |
525 | } else { |
526 | return Err(vec![Error::new_from_span( |
527 | ErrorVariant::CustomError { |
528 | message: "number cannot overflow u32" .to_owned(), |
529 | }, |
530 | min_number.as_span(), |
531 | )]); |
532 | }; |
533 | |
534 | let start = node.span.start_pos(); |
535 | ParserNode { |
536 | expr: ParserExpr::RepMin(Box::new(node), min), |
537 | span: start.span(&pair.as_span().end_pos()), |
538 | } |
539 | } |
540 | Rule::repeat_max => { |
541 | let mut inner = pair.clone().into_inner(); |
542 | |
543 | inner.next().unwrap(); // opening_brace |
544 | inner.next().unwrap(); // comma |
545 | |
546 | let max_number = inner.next().unwrap(); |
547 | let max = if let Ok(max) = max_number.as_str().parse::<u32>() { |
548 | max |
549 | } else { |
550 | return Err(vec![Error::new_from_span( |
551 | ErrorVariant::CustomError { |
552 | message: "number cannot overflow u32" .to_owned(), |
553 | }, |
554 | max_number.as_span(), |
555 | )]); |
556 | }; |
557 | |
558 | if max == 0 { |
559 | let error: Error<Rule> = Error::new_from_span( |
560 | ErrorVariant::CustomError { |
561 | message: "cannot repeat 0 times" .to_owned(), |
562 | }, |
563 | max_number.as_span(), |
564 | ); |
565 | |
566 | return Err(vec![error]); |
567 | } |
568 | |
569 | let start = node.span.start_pos(); |
570 | ParserNode { |
571 | expr: ParserExpr::RepMax(Box::new(node), max), |
572 | span: start.span(&pair.as_span().end_pos()), |
573 | } |
574 | } |
575 | Rule::repeat_min_max => { |
576 | let mut inner = pair.clone().into_inner(); |
577 | |
578 | inner.next().unwrap(); // opening_brace |
579 | |
580 | let min_number = inner.next().unwrap(); |
581 | let min = if let Ok(min) = min_number.as_str().parse::<u32>() { |
582 | min |
583 | } else { |
584 | return Err(vec![Error::new_from_span( |
585 | ErrorVariant::CustomError { |
586 | message: "number cannot overflow u32" .to_owned(), |
587 | }, |
588 | min_number.as_span(), |
589 | )]); |
590 | }; |
591 | |
592 | inner.next().unwrap(); // comma |
593 | |
594 | let max_number = inner.next().unwrap(); |
595 | let max = if let Ok(max) = max_number.as_str().parse::<u32>() { |
596 | max |
597 | } else { |
598 | return Err(vec![Error::new_from_span( |
599 | ErrorVariant::CustomError { |
600 | message: "number cannot overflow u32" .to_owned(), |
601 | }, |
602 | max_number.as_span(), |
603 | )]); |
604 | }; |
605 | |
606 | if max == 0 { |
607 | let error: Error<Rule> = Error::new_from_span( |
608 | ErrorVariant::CustomError { |
609 | message: "cannot repeat 0 times" .to_owned(), |
610 | }, |
611 | max_number.as_span(), |
612 | ); |
613 | |
614 | return Err(vec![error]); |
615 | } |
616 | |
617 | let start = node.span.start_pos(); |
618 | ParserNode { |
619 | expr: ParserExpr::RepMinMax(Box::new(node), min, max), |
620 | span: start.span(&pair.as_span().end_pos()), |
621 | } |
622 | } |
623 | Rule::closing_paren => { |
624 | let start = node.span.start_pos(); |
625 | |
626 | ParserNode { |
627 | expr: node.expr, |
628 | span: start.span(&pair.as_span().end_pos()), |
629 | } |
630 | } |
631 | rule => unreachable!("node: {:?}" , rule), |
632 | }; |
633 | |
634 | Ok(node) |
635 | }, |
636 | )? |
637 | } |
638 | }; |
639 | #[cfg (feature = "grammar-extras" )] |
640 | if let Some((tag, start)) = tag_start { |
641 | let span = start.span(&node.span.end_pos()); |
642 | Ok(ParserNode { |
643 | expr: ParserExpr::NodeTag(Box::new(node), tag), |
644 | span, |
645 | }) |
646 | } else { |
647 | Ok(node) |
648 | } |
649 | #[cfg (not(feature = "grammar-extras" ))] |
650 | Ok(node) |
651 | } |
652 | |
653 | let term = |pair: Pair<'i, Rule>| unaries(pair.into_inner().peekable(), pratt); |
654 | let infix = |lhs: Result<ParserNode<'i>, Vec<Error<Rule>>>, |
655 | op: Pair<'i, Rule>, |
656 | rhs: Result<ParserNode<'i>, Vec<Error<Rule>>>| match op.as_rule() { |
657 | Rule::sequence_operator => { |
658 | let lhs = lhs?; |
659 | let rhs = rhs?; |
660 | |
661 | let start = lhs.span.start_pos(); |
662 | let end = rhs.span.end_pos(); |
663 | |
664 | Ok(ParserNode { |
665 | expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)), |
666 | span: start.span(&end), |
667 | }) |
668 | } |
669 | Rule::choice_operator => { |
670 | let lhs = lhs?; |
671 | let rhs = rhs?; |
672 | |
673 | let start = lhs.span.start_pos(); |
674 | let end = rhs.span.end_pos(); |
675 | |
676 | Ok(ParserNode { |
677 | expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)), |
678 | span: start.span(&end), |
679 | }) |
680 | } |
681 | _ => unreachable!("infix" ), |
682 | }; |
683 | |
684 | pratt.map_primary(term).map_infix(infix).parse(pairs) |
685 | } |
686 | |
687 | fn unescape(string: &str) -> Option<String> { |
688 | let mut result = String::new(); |
689 | let mut chars = string.chars(); |
690 | |
691 | loop { |
692 | match chars.next() { |
693 | Some(' \\' ) => match chars.next()? { |
694 | '"' => result.push('"' ), |
695 | ' \\' => result.push(' \\' ), |
696 | 'r' => result.push(' \r' ), |
697 | 'n' => result.push(' \n' ), |
698 | 't' => result.push(' \t' ), |
699 | '0' => result.push(' \0' ), |
700 | ' \'' => result.push(' \'' ), |
701 | 'x' => { |
702 | let string: String = chars.clone().take(2).collect(); |
703 | |
704 | if string.len() != 2 { |
705 | return None; |
706 | } |
707 | |
708 | for _ in 0..string.len() { |
709 | chars.next()?; |
710 | } |
711 | |
712 | let value = u8::from_str_radix(&string, 16).ok()?; |
713 | |
714 | result.push(char::from(value)); |
715 | } |
716 | 'u' => { |
717 | if chars.next()? != '{' { |
718 | return None; |
719 | } |
720 | |
721 | let string: String = chars.clone().take_while(|c| *c != '}' ).collect(); |
722 | |
723 | if string.len() < 2 || 6 < string.len() { |
724 | return None; |
725 | } |
726 | |
727 | for _ in 0..string.len() + 1 { |
728 | chars.next()?; |
729 | } |
730 | |
731 | let value = u32::from_str_radix(&string, 16).ok()?; |
732 | |
733 | result.push(char::from_u32(value)?); |
734 | } |
735 | _ => return None, |
736 | }, |
737 | Some(c) => result.push(c), |
738 | None => return Some(result), |
739 | }; |
740 | } |
741 | } |
742 | |
743 | #[cfg (test)] |
744 | mod tests { |
745 | use std::convert::TryInto; |
746 | |
747 | use super::super::unwrap_or_report; |
748 | use super::*; |
749 | |
750 | #[test ] |
751 | fn rules() { |
752 | parses_to! { |
753 | parser: PestParser, |
754 | input: "a = { b } c = { d }" , |
755 | rule: Rule::grammar_rules, |
756 | tokens: [ |
757 | grammar_rule(0, 9, [ |
758 | identifier(0, 1), |
759 | assignment_operator(2, 3), |
760 | opening_brace(4, 5), |
761 | expression(6, 8, [ |
762 | term(6, 8, [ |
763 | identifier(6, 7) |
764 | ]) |
765 | ]), |
766 | closing_brace(8, 9) |
767 | ]), |
768 | grammar_rule(10, 19, [ |
769 | identifier(10, 11), |
770 | assignment_operator(12, 13), |
771 | opening_brace(14, 15), |
772 | expression(16, 18, [ |
773 | term(16, 18, [ |
774 | identifier(16, 17) |
775 | ]) |
776 | ]), |
777 | closing_brace(18, 19) |
778 | ]) |
779 | ] |
780 | }; |
781 | } |
782 | |
783 | #[test ] |
784 | fn rule() { |
785 | parses_to! { |
786 | parser: PestParser, |
787 | input: "a = ! { b ~ c }" , |
788 | rule: Rule::grammar_rule, |
789 | tokens: [ |
790 | grammar_rule(0, 15, [ |
791 | identifier(0, 1), |
792 | assignment_operator(2, 3), |
793 | non_atomic_modifier(4, 5), |
794 | opening_brace(6, 7), |
795 | expression(8, 14, [ |
796 | term(8, 10, [ |
797 | identifier(8, 9) |
798 | ]), |
799 | sequence_operator(10, 11), |
800 | term(12, 14, [ |
801 | identifier(12, 13) |
802 | ]) |
803 | ]), |
804 | closing_brace(14, 15) |
805 | ]) |
806 | ] |
807 | }; |
808 | } |
809 | |
810 | #[test ] |
811 | fn expression() { |
812 | parses_to! { |
813 | parser: PestParser, |
814 | input: "_a | 'a'..'b' ~ !^ \"abc \" ~ (d | e)*?" , |
815 | rule: Rule::expression, |
816 | tokens: [ |
817 | expression(0, 35, [ |
818 | term(0, 3, [ |
819 | identifier(0, 2) |
820 | ]), |
821 | choice_operator(3, 4), |
822 | term(5, 14, [ |
823 | range(5, 13, [ |
824 | character(5, 8, [ |
825 | single_quote(5, 6), |
826 | inner_chr(6, 7), |
827 | single_quote(7, 8) |
828 | ]), |
829 | range_operator(8, 10), |
830 | character(10, 13, [ |
831 | single_quote(10, 11), |
832 | inner_chr(11, 12), |
833 | single_quote(12, 13) |
834 | ]) |
835 | ]) |
836 | ]), |
837 | sequence_operator(14, 15), |
838 | term(16, 24, [ |
839 | negative_predicate_operator(16, 17), |
840 | insensitive_string(17, 23, [ |
841 | string(18, 23, [ |
842 | quote(18, 19), |
843 | inner_str(19, 22), |
844 | quote(22, 23) |
845 | ]) |
846 | ]) |
847 | ]), |
848 | sequence_operator(24, 25), |
849 | term(26, 35, [ |
850 | opening_paren(26, 27), |
851 | expression(27, 32, [ |
852 | term(27, 29, [ |
853 | identifier(27, 28) |
854 | ]), |
855 | choice_operator(29, 30), |
856 | term(31, 32, [ |
857 | identifier(31, 32) |
858 | ]) |
859 | ]), |
860 | closing_paren(32, 33), |
861 | repeat_operator(33, 34), |
862 | optional_operator(34, 35) |
863 | ]) |
864 | ]) |
865 | ] |
866 | }; |
867 | } |
868 | |
869 | #[test ] |
870 | fn repeat_exact() { |
871 | parses_to! { |
872 | parser: PestParser, |
873 | input: "{1}" , |
874 | rule: Rule::repeat_exact, |
875 | tokens: [ |
876 | repeat_exact(0, 3, [ |
877 | opening_brace(0, 1), |
878 | number(1, 2), |
879 | closing_brace(2, 3) |
880 | ]) |
881 | ] |
882 | }; |
883 | } |
884 | |
885 | #[test ] |
886 | fn repeat_min() { |
887 | parses_to! { |
888 | parser: PestParser, |
889 | input: "{2,}" , |
890 | rule: Rule::repeat_min, |
891 | tokens: [ |
892 | repeat_min(0, 4, [ |
893 | opening_brace(0,1), |
894 | number(1,2), |
895 | comma(2,3), |
896 | closing_brace(3,4) |
897 | ]) |
898 | ] |
899 | } |
900 | } |
901 | |
902 | #[test ] |
903 | fn repeat_max() { |
904 | parses_to! { |
905 | parser: PestParser, |
906 | input: "{, 3}" , |
907 | rule: Rule::repeat_max, |
908 | tokens: [ |
909 | repeat_max(0, 5, [ |
910 | opening_brace(0,1), |
911 | comma(1,2), |
912 | number(3,4), |
913 | closing_brace(4,5) |
914 | ]) |
915 | ] |
916 | } |
917 | } |
918 | |
919 | #[test ] |
920 | fn repeat_min_max() { |
921 | parses_to! { |
922 | parser: PestParser, |
923 | input: "{1, 2}" , |
924 | rule: Rule::repeat_min_max, |
925 | tokens: [ |
926 | repeat_min_max(0, 6, [ |
927 | opening_brace(0, 1), |
928 | number(1, 2), |
929 | comma(2, 3), |
930 | number(4, 5), |
931 | closing_brace(5, 6) |
932 | ]) |
933 | ] |
934 | }; |
935 | } |
936 | |
937 | #[test ] |
938 | fn push() { |
939 | parses_to! { |
940 | parser: PestParser, |
941 | input: "PUSH ( a )" , |
942 | rule: Rule::_push, |
943 | tokens: [ |
944 | _push(0, 10, [ |
945 | opening_paren(5, 6), |
946 | expression(7, 9, [ |
947 | term(7, 9, [ |
948 | identifier(7, 8) |
949 | ]) |
950 | ]), |
951 | closing_paren(9, 10) |
952 | ]) |
953 | ] |
954 | }; |
955 | } |
956 | |
957 | #[test ] |
958 | fn peek_slice_all() { |
959 | parses_to! { |
960 | parser: PestParser, |
961 | input: "PEEK[..]" , |
962 | rule: Rule::peek_slice, |
963 | tokens: [ |
964 | peek_slice(0, 8, [ |
965 | opening_brack(4, 5), |
966 | range_operator(5, 7), |
967 | closing_brack(7, 8) |
968 | ]) |
969 | ] |
970 | }; |
971 | } |
972 | |
973 | #[test ] |
974 | fn peek_slice_start() { |
975 | parses_to! { |
976 | parser: PestParser, |
977 | input: "PEEK[1..]" , |
978 | rule: Rule::peek_slice, |
979 | tokens: [ |
980 | peek_slice(0, 9, [ |
981 | opening_brack(4, 5), |
982 | integer(5, 6), |
983 | range_operator(6, 8), |
984 | closing_brack(8, 9) |
985 | ]) |
986 | ] |
987 | }; |
988 | } |
989 | |
990 | #[test ] |
991 | fn peek_slice_end() { |
992 | parses_to! { |
993 | parser: PestParser, |
994 | input: "PEEK[ ..-1]" , |
995 | rule: Rule::peek_slice, |
996 | tokens: [ |
997 | peek_slice(0, 11, [ |
998 | opening_brack(4, 5), |
999 | range_operator(6, 8), |
1000 | integer(8, 10), |
1001 | closing_brack(10, 11) |
1002 | ]) |
1003 | ] |
1004 | }; |
1005 | } |
1006 | |
1007 | #[test ] |
1008 | fn peek_slice_start_end() { |
1009 | parses_to! { |
1010 | parser: PestParser, |
1011 | input: "PEEK[-5..10]" , |
1012 | rule: Rule::peek_slice, |
1013 | tokens: [ |
1014 | peek_slice(0, 12, [ |
1015 | opening_brack(4, 5), |
1016 | integer(5, 7), |
1017 | range_operator(7, 9), |
1018 | integer(9, 11), |
1019 | closing_brack(11, 12) |
1020 | ]) |
1021 | ] |
1022 | }; |
1023 | } |
1024 | |
1025 | #[test ] |
1026 | fn identifier() { |
1027 | parses_to! { |
1028 | parser: PestParser, |
1029 | input: "_a8943" , |
1030 | rule: Rule::identifier, |
1031 | tokens: [ |
1032 | identifier(0, 6) |
1033 | ] |
1034 | }; |
1035 | } |
1036 | |
1037 | #[test ] |
1038 | fn string() { |
1039 | parses_to! { |
1040 | parser: PestParser, |
1041 | input: " \"aaaaa \\n \\r \\t \\\\\\0 \\' \\\"\\x0F \\u{123abC} \\u{12}aaaaa \"" , |
1042 | rule: Rule::string, |
1043 | tokens: [ |
1044 | string(0, 46, [ |
1045 | quote(0, 1), |
1046 | inner_str(1, 45), |
1047 | quote(45, 46) |
1048 | ]) |
1049 | ] |
1050 | }; |
1051 | } |
1052 | |
1053 | #[test ] |
1054 | fn insensitive_string() { |
1055 | parses_to! { |
1056 | parser: PestParser, |
1057 | input: "^ \"\\\"hi \"" , |
1058 | rule: Rule::insensitive_string, |
1059 | tokens: [ |
1060 | insensitive_string(0, 9, [ |
1061 | string(3, 9, [ |
1062 | quote(3, 4), |
1063 | inner_str(4, 8), |
1064 | quote(8, 9) |
1065 | ]) |
1066 | ]) |
1067 | ] |
1068 | }; |
1069 | } |
1070 | |
1071 | #[test ] |
1072 | fn range() { |
1073 | parses_to! { |
1074 | parser: PestParser, |
1075 | input: "' \\n' .. ' \\x1a'" , |
1076 | rule: Rule::range, |
1077 | tokens: [ |
1078 | range(0, 14, [ |
1079 | character(0, 4, [ |
1080 | single_quote(0, 1), |
1081 | inner_chr(1, 3), |
1082 | single_quote(3, 4) |
1083 | ]), |
1084 | range_operator(5, 7), |
1085 | character(8, 14, [ |
1086 | single_quote(8, 9), |
1087 | inner_chr(9, 13), |
1088 | single_quote(13, 14) |
1089 | ]) |
1090 | ]) |
1091 | ] |
1092 | }; |
1093 | } |
1094 | |
1095 | #[test ] |
1096 | fn character() { |
1097 | parses_to! { |
1098 | parser: PestParser, |
1099 | input: "' \\u{123abC}'" , |
1100 | rule: Rule::character, |
1101 | tokens: [ |
1102 | character(0, 12, [ |
1103 | single_quote(0, 1), |
1104 | inner_chr(1, 11), |
1105 | single_quote(11, 12) |
1106 | ]) |
1107 | ] |
1108 | }; |
1109 | } |
1110 | |
1111 | #[test ] |
1112 | fn number() { |
1113 | parses_to! { |
1114 | parser: PestParser, |
1115 | input: "0123" , |
1116 | rule: Rule::number, |
1117 | tokens: [ |
1118 | number(0, 4) |
1119 | ] |
1120 | }; |
1121 | } |
1122 | |
1123 | #[test ] |
1124 | fn comment() { |
1125 | parses_to! { |
1126 | parser: PestParser, |
1127 | input: "a ~ // asda \n b" , |
1128 | rule: Rule::expression, |
1129 | tokens: [ |
1130 | expression(0, 17, [ |
1131 | term(0, 2, [ |
1132 | identifier(0, 1) |
1133 | ]), |
1134 | sequence_operator(2, 3), |
1135 | term(16, 17, [ |
1136 | identifier(16, 17) |
1137 | ]) |
1138 | ]) |
1139 | ] |
1140 | }; |
1141 | } |
1142 | |
1143 | #[test ] |
1144 | fn grammar_doc_and_line_doc() { |
1145 | let input = "//! hello \n/// world \na = { \"a \" }" ; |
1146 | parses_to! { |
1147 | parser: PestParser, |
1148 | input: input, |
1149 | rule: Rule::grammar_rules, |
1150 | tokens: [ |
1151 | grammar_doc(0, 9, [ |
1152 | inner_doc(4, 9), |
1153 | ]), |
1154 | grammar_rule(10, 19, [ |
1155 | line_doc(10, 19, [ |
1156 | inner_doc(14, 19), |
1157 | ]), |
1158 | ]), |
1159 | grammar_rule(20, 31, [ |
1160 | identifier(20, 21), |
1161 | assignment_operator(22, 23), |
1162 | opening_brace(24, 25), |
1163 | expression(26, 30, [ |
1164 | term(26, 30, [ |
1165 | string(26, 29, [ |
1166 | quote(26, 27), |
1167 | inner_str(27, 28), |
1168 | quote(28, 29) |
1169 | ]) |
1170 | ]) |
1171 | ]), |
1172 | closing_brace(30, 31), |
1173 | ]) |
1174 | ] |
1175 | }; |
1176 | } |
1177 | |
1178 | #[test ] |
1179 | fn wrong_identifier() { |
1180 | fails_with! { |
1181 | parser: PestParser, |
1182 | input: "0" , |
1183 | rule: Rule::grammar_rules, |
1184 | positives: vec![Rule::EOI, Rule::grammar_rule, Rule::grammar_doc], |
1185 | negatives: vec![], |
1186 | pos: 0 |
1187 | }; |
1188 | } |
1189 | |
1190 | #[test ] |
1191 | fn missing_assignment_operator() { |
1192 | fails_with! { |
1193 | parser: PestParser, |
1194 | input: "a {}" , |
1195 | rule: Rule::grammar_rules, |
1196 | positives: vec![Rule::assignment_operator], |
1197 | negatives: vec![], |
1198 | pos: 2 |
1199 | }; |
1200 | } |
1201 | |
1202 | #[test ] |
1203 | fn wrong_modifier() { |
1204 | fails_with! { |
1205 | parser: PestParser, |
1206 | input: "a = *{}" , |
1207 | rule: Rule::grammar_rules, |
1208 | positives: vec![ |
1209 | Rule::opening_brace, |
1210 | Rule::silent_modifier, |
1211 | Rule::atomic_modifier, |
1212 | Rule::compound_atomic_modifier, |
1213 | Rule::non_atomic_modifier |
1214 | ], |
1215 | negatives: vec![], |
1216 | pos: 4 |
1217 | }; |
1218 | } |
1219 | |
1220 | #[test ] |
1221 | fn missing_opening_brace() { |
1222 | fails_with! { |
1223 | parser: PestParser, |
1224 | input: "a = _" , |
1225 | rule: Rule::grammar_rules, |
1226 | positives: vec![Rule::opening_brace], |
1227 | negatives: vec![], |
1228 | pos: 5 |
1229 | }; |
1230 | } |
1231 | |
1232 | #[test ] |
1233 | fn empty_rule() { |
1234 | fails_with! { |
1235 | parser: PestParser, |
1236 | input: "a = {}" , |
1237 | rule: Rule::grammar_rules, |
1238 | positives: vec![Rule::expression], |
1239 | negatives: vec![], |
1240 | pos: 5 |
1241 | }; |
1242 | } |
1243 | |
1244 | #[test ] |
1245 | fn missing_rhs() { |
1246 | fails_with! { |
1247 | parser: PestParser, |
1248 | input: "a = { b ~ }" , |
1249 | rule: Rule::grammar_rules, |
1250 | positives: vec![Rule::term], |
1251 | negatives: vec![], |
1252 | pos: 10 |
1253 | }; |
1254 | } |
1255 | |
1256 | #[test ] |
1257 | fn incorrect_prefix() { |
1258 | fails_with! { |
1259 | parser: PestParser, |
1260 | input: "a = { ~ b}" , |
1261 | rule: Rule::grammar_rules, |
1262 | positives: vec![Rule::expression], |
1263 | negatives: vec![], |
1264 | pos: 6 |
1265 | }; |
1266 | } |
1267 | |
1268 | #[test ] |
1269 | fn wrong_op() { |
1270 | fails_with! { |
1271 | parser: PestParser, |
1272 | input: "a = { b % }" , |
1273 | rule: Rule::grammar_rules, |
1274 | positives: vec![ |
1275 | Rule::opening_brace, |
1276 | Rule::closing_brace, |
1277 | Rule::sequence_operator, |
1278 | Rule::choice_operator, |
1279 | Rule::optional_operator, |
1280 | Rule::repeat_operator, |
1281 | Rule::repeat_once_operator |
1282 | ], |
1283 | negatives: vec![], |
1284 | pos: 8 |
1285 | }; |
1286 | } |
1287 | |
1288 | #[test ] |
1289 | fn missing_closing_paren() { |
1290 | fails_with! { |
1291 | parser: PestParser, |
1292 | input: "a = { (b }" , |
1293 | rule: Rule::grammar_rules, |
1294 | positives: vec![ |
1295 | Rule::opening_brace, |
1296 | Rule::closing_paren, |
1297 | Rule::sequence_operator, |
1298 | Rule::choice_operator, |
1299 | Rule::optional_operator, |
1300 | Rule::repeat_operator, |
1301 | Rule::repeat_once_operator |
1302 | ], |
1303 | negatives: vec![], |
1304 | pos: 9 |
1305 | }; |
1306 | } |
1307 | |
1308 | #[test ] |
1309 | fn missing_term() { |
1310 | fails_with! { |
1311 | parser: PestParser, |
1312 | input: "a = { ! }" , |
1313 | rule: Rule::grammar_rules, |
1314 | positives: vec![ |
1315 | Rule::opening_paren, |
1316 | Rule::positive_predicate_operator, |
1317 | Rule::negative_predicate_operator, |
1318 | Rule::_push, |
1319 | Rule::peek_slice, |
1320 | Rule::identifier, |
1321 | Rule::insensitive_string, |
1322 | Rule::quote, |
1323 | Rule::single_quote |
1324 | ], |
1325 | negatives: vec![], |
1326 | pos: 8 |
1327 | }; |
1328 | } |
1329 | |
1330 | #[test ] |
1331 | fn string_missing_ending_quote() { |
1332 | fails_with! { |
1333 | parser: PestParser, |
1334 | input: "a = { \" }" , |
1335 | rule: Rule::grammar_rules, |
1336 | positives: vec![Rule::quote], |
1337 | negatives: vec![], |
1338 | pos: 9 |
1339 | }; |
1340 | } |
1341 | |
1342 | #[test ] |
1343 | fn insensitive_missing_string() { |
1344 | fails_with! { |
1345 | parser: PestParser, |
1346 | input: "a = { ^ }" , |
1347 | rule: Rule::grammar_rules, |
1348 | positives: vec![Rule::quote], |
1349 | negatives: vec![], |
1350 | pos: 8 |
1351 | }; |
1352 | } |
1353 | |
1354 | #[test ] |
1355 | fn char_missing_ending_single_quote() { |
1356 | fails_with! { |
1357 | parser: PestParser, |
1358 | input: "a = { \' }" , |
1359 | rule: Rule::grammar_rules, |
1360 | positives: vec![Rule::single_quote], |
1361 | negatives: vec![], |
1362 | pos: 8 |
1363 | }; |
1364 | } |
1365 | |
1366 | #[test ] |
1367 | fn range_missing_range_operator() { |
1368 | fails_with! { |
1369 | parser: PestParser, |
1370 | input: "a = { \'a \' }" , |
1371 | rule: Rule::grammar_rules, |
1372 | positives: vec![Rule::range_operator], |
1373 | negatives: vec![], |
1374 | pos: 10 |
1375 | }; |
1376 | } |
1377 | |
1378 | #[test ] |
1379 | fn wrong_postfix() { |
1380 | fails_with! { |
1381 | parser: PestParser, |
1382 | input: "a = { a& }" , |
1383 | rule: Rule::grammar_rules, |
1384 | positives: vec![ |
1385 | Rule::opening_brace, |
1386 | Rule::closing_brace, |
1387 | Rule::sequence_operator, |
1388 | Rule::choice_operator, |
1389 | Rule::optional_operator, |
1390 | Rule::repeat_operator, |
1391 | Rule::repeat_once_operator |
1392 | ], |
1393 | negatives: vec![], |
1394 | pos: 7 |
1395 | }; |
1396 | } |
1397 | |
1398 | #[test ] |
1399 | fn node_tag() { |
1400 | parses_to! { |
1401 | parser: PestParser, |
1402 | input: "#a = a" , |
1403 | rule: Rule::expression, |
1404 | tokens: [ |
1405 | expression(0, 6, [ |
1406 | term(0, 6, [ |
1407 | tag_id(0, 2), |
1408 | assignment_operator(3, 4), |
1409 | identifier(5, 6) |
1410 | ]) |
1411 | ]) |
1412 | ] |
1413 | }; |
1414 | } |
1415 | |
1416 | #[test ] |
1417 | fn incomplete_node_tag() { |
1418 | fails_with! { |
1419 | parser: PestParser, |
1420 | input: "a = { # }" , |
1421 | rule: Rule::grammar_rules, |
1422 | positives: vec![ |
1423 | Rule::expression |
1424 | ], |
1425 | negatives: vec![], |
1426 | pos: 6 |
1427 | }; |
1428 | } |
1429 | |
1430 | #[test ] |
1431 | fn incomplete_node_tag_assignment() { |
1432 | fails_with! { |
1433 | parser: PestParser, |
1434 | input: "a = { #a = }" , |
1435 | rule: Rule::grammar_rules, |
1436 | positives: vec![ |
1437 | Rule::opening_paren, |
1438 | Rule::positive_predicate_operator, |
1439 | Rule::negative_predicate_operator, |
1440 | Rule::_push, |
1441 | Rule::peek_slice, |
1442 | Rule::identifier, |
1443 | Rule::insensitive_string, |
1444 | Rule::quote, |
1445 | Rule::single_quote |
1446 | ], |
1447 | negatives: vec![], |
1448 | pos: 11 |
1449 | }; |
1450 | } |
1451 | |
1452 | #[test ] |
1453 | fn incomplete_node_tag_pound_key() { |
1454 | fails_with! { |
1455 | parser: PestParser, |
1456 | input: "a = { a = a }" , |
1457 | rule: Rule::grammar_rules, |
1458 | positives: vec![ |
1459 | Rule::opening_brace, |
1460 | Rule::closing_brace, |
1461 | Rule::sequence_operator, |
1462 | Rule::choice_operator, |
1463 | Rule::optional_operator, |
1464 | Rule::repeat_operator, |
1465 | Rule::repeat_once_operator |
1466 | ], |
1467 | negatives: vec![], |
1468 | pos: 8 |
1469 | }; |
1470 | } |
1471 | |
1472 | #[test ] |
1473 | fn ast() { |
1474 | let input = r##" |
1475 | /// This is line comment |
1476 | /// This is rule |
1477 | rule = _{ a{1} ~ "a"{3,} ~ b{, 2} ~ "b"{1, 2} | !(^"c" | PUSH('d'..'e'))?* } |
1478 | "## ; |
1479 | |
1480 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1481 | let ast = consume_rules_with_spans(pairs).unwrap(); |
1482 | let ast: Vec<_> = ast.into_iter().map(convert_rule).collect(); |
1483 | |
1484 | assert_eq!( |
1485 | ast, |
1486 | vec![AstRule { |
1487 | name: "rule" .to_owned(), |
1488 | ty: RuleType::Silent, |
1489 | expr: Expr::Choice( |
1490 | Box::new(Expr::Seq( |
1491 | Box::new(Expr::Seq( |
1492 | Box::new(Expr::Seq( |
1493 | Box::new(Expr::RepExact(Box::new(Expr::Ident("a" .to_owned())), 1)), |
1494 | Box::new(Expr::RepMin(Box::new(Expr::Str("a" .to_owned())), 3)) |
1495 | )), |
1496 | Box::new(Expr::RepMax(Box::new(Expr::Ident("b" .to_owned())), 2)) |
1497 | )), |
1498 | Box::new(Expr::RepMinMax(Box::new(Expr::Str("b" .to_owned())), 1, 2)) |
1499 | )), |
1500 | Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt( |
1501 | Box::new(Expr::Choice( |
1502 | Box::new(Expr::Insens("c" .to_owned())), |
1503 | Box::new(Expr::Push(Box::new(Expr::Range( |
1504 | "d" .to_owned(), |
1505 | "e" .to_owned() |
1506 | )))) |
1507 | )) |
1508 | )))))) |
1509 | ) |
1510 | },] |
1511 | ); |
1512 | } |
1513 | |
1514 | #[test ] |
1515 | fn ast_peek_slice() { |
1516 | let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }" ; |
1517 | |
1518 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1519 | let ast = consume_rules_with_spans(pairs).unwrap(); |
1520 | let ast: Vec<_> = ast.into_iter().map(convert_rule).collect(); |
1521 | |
1522 | assert_eq!( |
1523 | ast, |
1524 | vec![AstRule { |
1525 | name: "rule" .to_owned(), |
1526 | ty: RuleType::Silent, |
1527 | expr: Expr::Seq( |
1528 | Box::new(Expr::PeekSlice(-4, None)), |
1529 | Box::new(Expr::PeekSlice(0, Some(3))), |
1530 | ), |
1531 | }], |
1532 | ); |
1533 | } |
1534 | |
1535 | #[test ] |
1536 | #[should_panic (expected = "grammar error |
1537 | |
1538 | --> 1:13 |
1539 | | |
1540 | 1 | rule = { \"\"{4294967297} } |
1541 | | ^--------^ |
1542 | | |
1543 | = number cannot overflow u32" )] |
1544 | fn repeat_exact_overflow() { |
1545 | let input = "rule = { \"\"{4294967297} }" ; |
1546 | |
1547 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1548 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1549 | } |
1550 | |
1551 | #[test ] |
1552 | #[should_panic (expected = "grammar error |
1553 | |
1554 | --> 1:13 |
1555 | | |
1556 | 1 | rule = { \"\"{0} } |
1557 | | ^ |
1558 | | |
1559 | = cannot repeat 0 times" )] |
1560 | fn repeat_exact_zero() { |
1561 | let input = "rule = { \"\"{0} }" ; |
1562 | |
1563 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1564 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1565 | } |
1566 | |
1567 | #[test ] |
1568 | #[should_panic (expected = "grammar error |
1569 | |
1570 | --> 1:13 |
1571 | | |
1572 | 1 | rule = { \"\"{4294967297,} } |
1573 | | ^--------^ |
1574 | | |
1575 | = number cannot overflow u32" )] |
1576 | fn repeat_min_overflow() { |
1577 | let input = "rule = { \"\"{4294967297,} }" ; |
1578 | |
1579 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1580 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1581 | } |
1582 | |
1583 | #[test ] |
1584 | #[should_panic (expected = "grammar error |
1585 | |
1586 | --> 1:14 |
1587 | | |
1588 | 1 | rule = { \"\"{,4294967297} } |
1589 | | ^--------^ |
1590 | | |
1591 | = number cannot overflow u32" )] |
1592 | fn repeat_max_overflow() { |
1593 | let input = "rule = { \"\"{,4294967297} }" ; |
1594 | |
1595 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1596 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1597 | } |
1598 | |
1599 | #[test ] |
1600 | #[should_panic (expected = "grammar error |
1601 | |
1602 | --> 1:14 |
1603 | | |
1604 | 1 | rule = { \"\"{,0} } |
1605 | | ^ |
1606 | | |
1607 | = cannot repeat 0 times" )] |
1608 | fn repeat_max_zero() { |
1609 | let input = "rule = { \"\"{,0} }" ; |
1610 | |
1611 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1612 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1613 | } |
1614 | |
1615 | #[test ] |
1616 | #[should_panic (expected = "grammar error |
1617 | |
1618 | --> 1:13 |
1619 | | |
1620 | 1 | rule = { \"\"{4294967297,4294967298} } |
1621 | | ^--------^ |
1622 | | |
1623 | = number cannot overflow u32" )] |
1624 | fn repeat_min_max_overflow() { |
1625 | let input = "rule = { \"\"{4294967297,4294967298} }" ; |
1626 | |
1627 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1628 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1629 | } |
1630 | |
1631 | #[test ] |
1632 | #[should_panic (expected = "grammar error |
1633 | |
1634 | --> 1:15 |
1635 | | |
1636 | 1 | rule = { \"\"{0,0} } |
1637 | | ^ |
1638 | | |
1639 | = cannot repeat 0 times" )] |
1640 | fn repeat_min_max_zero() { |
1641 | let input = "rule = { \"\"{0,0} }" ; |
1642 | |
1643 | let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); |
1644 | unwrap_or_report(consume_rules_with_spans(pairs)); |
1645 | } |
1646 | |
1647 | #[test ] |
1648 | fn unescape_all() { |
1649 | let string = r"a\nb\x55c\u{111}d" ; |
1650 | |
1651 | assert_eq!(unescape(string), Some("a \nb \x55c \u{111}d" .to_owned())); |
1652 | } |
1653 | |
1654 | #[test ] |
1655 | fn unescape_empty_escape() { |
1656 | let string = r"\" ; |
1657 | |
1658 | assert_eq!(unescape(string), None); |
1659 | } |
1660 | |
1661 | #[test ] |
1662 | fn unescape_wrong_escape() { |
1663 | let string = r"\w" ; |
1664 | |
1665 | assert_eq!(unescape(string), None); |
1666 | } |
1667 | |
1668 | #[test ] |
1669 | fn unescape_backslash() { |
1670 | let string = " \\\\" ; |
1671 | assert_eq!(unescape(string), Some(" \\" .to_owned())); |
1672 | } |
1673 | |
1674 | #[test ] |
1675 | fn unescape_return() { |
1676 | let string = " \\r" ; |
1677 | assert_eq!(unescape(string), Some(" \r" .to_owned())); |
1678 | } |
1679 | |
1680 | #[test ] |
1681 | fn unescape_tab() { |
1682 | let string = " \\t" ; |
1683 | assert_eq!(unescape(string), Some(" \t" .to_owned())); |
1684 | } |
1685 | |
1686 | #[test ] |
1687 | fn unescape_null() { |
1688 | let string = " \\0" ; |
1689 | assert_eq!(unescape(string), Some(" \0" .to_owned())); |
1690 | } |
1691 | |
1692 | #[test ] |
1693 | fn unescape_single_quote() { |
1694 | let string = " \\'" ; |
1695 | assert_eq!(unescape(string), Some(" \'" .to_owned())); |
1696 | } |
1697 | |
1698 | #[test ] |
1699 | fn unescape_wrong_byte() { |
1700 | let string = r"\xfg" ; |
1701 | |
1702 | assert_eq!(unescape(string), None); |
1703 | } |
1704 | |
1705 | #[test ] |
1706 | fn unescape_short_byte() { |
1707 | let string = r"\xf" ; |
1708 | |
1709 | assert_eq!(unescape(string), None); |
1710 | } |
1711 | |
1712 | #[test ] |
1713 | fn unescape_no_open_brace_unicode() { |
1714 | let string = r"\u11" ; |
1715 | |
1716 | assert_eq!(unescape(string), None); |
1717 | } |
1718 | |
1719 | #[test ] |
1720 | fn unescape_no_close_brace_unicode() { |
1721 | let string = r"\u{11" ; |
1722 | |
1723 | assert_eq!(unescape(string), None); |
1724 | } |
1725 | |
1726 | #[test ] |
1727 | fn unescape_short_unicode() { |
1728 | let string = r"\u{1}" ; |
1729 | |
1730 | assert_eq!(unescape(string), None); |
1731 | } |
1732 | |
1733 | #[test ] |
1734 | fn unescape_long_unicode() { |
1735 | let string = r"\u{1111111}" ; |
1736 | |
1737 | assert_eq!(unescape(string), None); |
1738 | } |
1739 | |
1740 | #[test ] |
1741 | fn handles_deep_nesting() { |
1742 | let sample1 = include_str!(concat!( |
1743 | env!("CARGO_MANIFEST_DIR" ), |
1744 | "/resources/test/fuzzsample1.grammar" |
1745 | )); |
1746 | let sample2 = include_str!(concat!( |
1747 | env!("CARGO_MANIFEST_DIR" ), |
1748 | "/resources/test/fuzzsample2.grammar" |
1749 | )); |
1750 | let sample3 = include_str!(concat!( |
1751 | env!("CARGO_MANIFEST_DIR" ), |
1752 | "/resources/test/fuzzsample3.grammar" |
1753 | )); |
1754 | let sample4 = include_str!(concat!( |
1755 | env!("CARGO_MANIFEST_DIR" ), |
1756 | "/resources/test/fuzzsample4.grammar" |
1757 | )); |
1758 | let sample5 = include_str!(concat!( |
1759 | env!("CARGO_MANIFEST_DIR" ), |
1760 | "/resources/test/fuzzsample5.grammar" |
1761 | )); |
1762 | const ERROR: &str = "call limit reached" ; |
1763 | pest::set_call_limit(Some(5_000usize.try_into().unwrap())); |
1764 | let s1 = parse(Rule::grammar_rules, sample1); |
1765 | assert!(s1.is_err()); |
1766 | assert_eq!(s1.unwrap_err().variant.message(), ERROR); |
1767 | let s2 = parse(Rule::grammar_rules, sample2); |
1768 | assert!(s2.is_err()); |
1769 | assert_eq!(s2.unwrap_err().variant.message(), ERROR); |
1770 | let s3 = parse(Rule::grammar_rules, sample3); |
1771 | assert!(s3.is_err()); |
1772 | assert_eq!(s3.unwrap_err().variant.message(), ERROR); |
1773 | let s4 = parse(Rule::grammar_rules, sample4); |
1774 | assert!(s4.is_err()); |
1775 | assert_eq!(s4.unwrap_err().variant.message(), ERROR); |
1776 | let s5 = parse(Rule::grammar_rules, sample5); |
1777 | assert!(s5.is_err()); |
1778 | assert_eq!(s5.unwrap_err().variant.message(), ERROR); |
1779 | } |
1780 | } |
1781 | |