1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::borrow::{Cow, ToOwned};
11use alloc::boxed::Box;
12use alloc::rc::Rc;
13use alloc::vec;
14use alloc::vec::Vec;
15use core::num::NonZeroUsize;
16use core::ops::Range;
17use core::sync::atomic::{AtomicUsize, Ordering};
18
19use crate::error::{Error, ErrorVariant};
20use crate::iterators::{pairs, QueueableToken};
21use crate::position::{self, Position};
22use crate::span::Span;
23use crate::stack::Stack;
24use crate::RuleType;
25
26/// The current lookahead status of a [`ParserState`].
27///
28/// [`ParserState`]: struct.ParserState.html
29#[derive(Clone, Copy, Debug, Eq, PartialEq)]
30pub enum Lookahead {
31 /// The positive predicate, written as an ampersand &,
32 /// attempts to match its inner expression.
33 /// If the inner expression succeeds, parsing continues,
34 /// but at the same position as the predicate —
35 /// &foo ~ bar is thus a kind of "AND" statement:
36 /// "the input string must match foo AND bar".
37 /// If the inner expression fails,
38 /// the whole expression fails too.
39 Positive,
40 /// The negative predicate, written as an exclamation mark !,
41 /// attempts to match its inner expression.
42 /// If the inner expression fails, the predicate succeeds
43 /// and parsing continues at the same position as the predicate.
44 /// If the inner expression succeeds, the predicate fails —
45 /// !foo ~ bar is thus a kind of "NOT" statement:
46 /// "the input string must match bar but NOT foo".
47 Negative,
48 /// No lookahead (i.e. it will consume input).
49 None,
50}
51
52/// The current atomicity of a [`ParserState`].
53///
54/// [`ParserState`]: struct.ParserState.html
55#[derive(Clone, Copy, Debug, Eq, PartialEq)]
56pub enum Atomicity {
57 /// prevents implicit whitespace: inside an atomic rule,
58 /// the tilde ~ means "immediately followed by",
59 /// and repetition operators (asterisk * and plus sign +)
60 /// have no implicit separation. In addition, all other rules
61 /// called from an atomic rule are also treated as atomic.
62 /// (interior matching rules are silent)
63 Atomic,
64 /// The same as atomic, but inner tokens are produced as normal.
65 CompoundAtomic,
66 /// implicit whitespace is enabled
67 NonAtomic,
68}
69
70/// Type alias to simplify specifying the return value of chained closures.
71pub type ParseResult<S> = Result<S, S>;
72
73/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
74#[derive(Clone, Copy, Debug, Eq, PartialEq)]
75pub enum MatchDir {
76 /// from the bottom to the top of the stack
77 BottomToTop,
78 /// from the top to the bottom of the stack
79 TopToBottom,
80}
81
82static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
83
84/// Sets the maximum call limit for the parser state
85/// to prevent stack overflows or excessive execution times
86/// in some grammars.
87/// If set, the calls are tracked as a running total
88/// over all non-terminal rules that can nest closures
89/// (which are passed to transform the parser state).
90///
91/// # Arguments
92///
93/// * `limit` - The maximum number of calls. If None,
94/// the number of calls is unlimited.
95pub fn set_call_limit(limit: Option<NonZeroUsize>) {
96 CALL_LIMIT.store(val:limit.map(|f| f.get()).unwrap_or(0), order:Ordering::Relaxed);
97}
98
99#[derive(Debug)]
100struct CallLimitTracker {
101 current_call_limit: Option<(usize, usize)>,
102}
103
104impl Default for CallLimitTracker {
105 fn default() -> Self {
106 let limit: usize = CALL_LIMIT.load(order:Ordering::Relaxed);
107 let current_call_limit: Option<(usize, usize)> = if limit > 0 { Some((0, limit)) } else { None };
108 Self { current_call_limit }
109 }
110}
111
112impl CallLimitTracker {
113 fn limit_reached(&self) -> bool {
114 self.current_call_limit
115 .map_or(default:false, |(current: usize, limit: usize)| current >= limit)
116 }
117
118 fn increment_depth(&mut self) {
119 if let Some((current: &mut usize, _)) = &mut self.current_call_limit {
120 *current += 1;
121 }
122 }
123}
124
125/// The complete state of a [`Parser`].
126///
127/// [`Parser`]: trait.Parser.html
128#[derive(Debug)]
129pub struct ParserState<'i, R: RuleType> {
130 position: Position<'i>,
131 queue: Vec<QueueableToken<'i, R>>,
132 lookahead: Lookahead,
133 pos_attempts: Vec<R>,
134 neg_attempts: Vec<R>,
135 attempt_pos: usize,
136 atomicity: Atomicity,
137 stack: Stack<Span<'i>>,
138 call_tracker: CallLimitTracker,
139}
140
141/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
142///
143/// # Examples
144///
145/// ```
146/// # use pest;
147/// let input = "";
148/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
149/// ```
150#[allow(clippy::perf)]
151pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
152where
153 F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
154{
155 let state = ParserState::new(input);
156
157 match f(state) {
158 Ok(state) => {
159 let len = state.queue.len();
160 Ok(pairs::new(Rc::new(state.queue), input, None, 0, len))
161 }
162 Err(mut state) => {
163 let variant = if state.reached_call_limit() {
164 ErrorVariant::CustomError {
165 message: "call limit reached".to_owned(),
166 }
167 } else {
168 state.pos_attempts.sort();
169 state.pos_attempts.dedup();
170 state.neg_attempts.sort();
171 state.neg_attempts.dedup();
172 ErrorVariant::ParsingError {
173 positives: state.pos_attempts.clone(),
174 negatives: state.neg_attempts.clone(),
175 }
176 };
177
178 Err(Error::new_from_pos(
179 variant,
180 // TODO(performance): Guarantee state.attempt_pos is a valid position
181 position::Position::new(input, state.attempt_pos).unwrap(),
182 ))
183 }
184 }
185}
186
187impl<'i, R: RuleType> ParserState<'i, R> {
188 /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
189 /// will be passed from closure to closure based on the needs of the specified `Parser`.
190 ///
191 /// # Examples
192 ///
193 /// ```
194 /// # use pest;
195 /// let input = "";
196 /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
197 /// ```
198 pub fn new(input: &'i str) -> Box<Self> {
199 Box::new(ParserState {
200 position: Position::from_start(input),
201 queue: vec![],
202 lookahead: Lookahead::None,
203 pos_attempts: vec![],
204 neg_attempts: vec![],
205 attempt_pos: 0,
206 atomicity: Atomicity::NonAtomic,
207 stack: Stack::new(),
208 call_tracker: Default::default(),
209 })
210 }
211
212 /// Returns a reference to the current `Position` of the `ParserState`.
213 ///
214 /// # Examples
215 ///
216 /// ```
217 /// # use pest;
218 /// # #[allow(non_camel_case_types)]
219 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
220 /// enum Rule {
221 /// ab
222 /// }
223 ///
224 /// let input = "ab";
225 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
226 /// let position = state.position();
227 /// assert_eq!(position.pos(), 0);
228 /// ```
229 pub fn position(&self) -> &Position<'i> {
230 &self.position
231 }
232
233 /// Returns the current atomicity of the `ParserState`.
234 ///
235 /// # Examples
236 ///
237 /// ```
238 /// # use pest;
239 /// # use pest::Atomicity;
240 /// # #[allow(non_camel_case_types)]
241 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
242 /// enum Rule {
243 /// ab
244 /// }
245 ///
246 /// let input = "ab";
247 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
248 /// let atomicity = state.atomicity();
249 /// assert_eq!(atomicity, Atomicity::NonAtomic);
250 /// ```
251 pub fn atomicity(&self) -> Atomicity {
252 self.atomicity
253 }
254
255 #[inline]
256 fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
257 if self.call_tracker.limit_reached() {
258 return Err(self);
259 }
260 self.call_tracker.increment_depth();
261 Ok(self)
262 }
263
264 #[inline]
265 fn reached_call_limit(&self) -> bool {
266 self.call_tracker.limit_reached()
267 }
268
269 /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
270 /// meant to match the rule.
271 ///
272 /// # Examples
273 ///
274 /// ```
275 /// # use pest;
276 /// # #[allow(non_camel_case_types)]
277 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
278 /// enum Rule {
279 /// a
280 /// }
281 ///
282 /// let input = "a";
283 /// let pairs: Vec<_> = pest::state(input, |state| {
284 /// state.rule(Rule::a, |s| Ok(s))
285 /// }).unwrap().collect();
286 ///
287 /// assert_eq!(pairs.len(), 1);
288 /// ```
289 #[inline]
290 pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
291 where
292 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
293 {
294 self = self.inc_call_check_limit()?;
295 let actual_pos = self.position.pos();
296 let index = self.queue.len();
297
298 let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
299 (self.pos_attempts.len(), self.neg_attempts.len())
300 } else {
301 // Attempts have not been cleared yet since the attempt_pos is older.
302 (0, 0)
303 };
304
305 if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
306 // Pair's position will only be known after running the closure.
307 self.queue.push(QueueableToken::Start {
308 end_token_index: 0,
309 input_pos: actual_pos,
310 });
311 }
312
313 let attempts = self.attempts_at(actual_pos);
314
315 let result = f(self);
316
317 match result {
318 Ok(mut new_state) => {
319 if new_state.lookahead == Lookahead::Negative {
320 new_state.track(
321 rule,
322 actual_pos,
323 pos_attempts_index,
324 neg_attempts_index,
325 attempts,
326 );
327 }
328
329 if new_state.lookahead == Lookahead::None
330 && new_state.atomicity != Atomicity::Atomic
331 {
332 // Storing the pair's index in the first token that was added before the closure was
333 // run.
334 let new_index = new_state.queue.len();
335 match new_state.queue[index] {
336 QueueableToken::Start {
337 ref mut end_token_index,
338 ..
339 } => *end_token_index = new_index,
340 _ => unreachable!(),
341 };
342
343 let new_pos = new_state.position.pos();
344
345 new_state.queue.push(QueueableToken::End {
346 start_token_index: index,
347 rule,
348 tag: None,
349 input_pos: new_pos,
350 });
351 }
352
353 Ok(new_state)
354 }
355 Err(mut new_state) => {
356 if new_state.lookahead != Lookahead::Negative {
357 new_state.track(
358 rule,
359 actual_pos,
360 pos_attempts_index,
361 neg_attempts_index,
362 attempts,
363 );
364 }
365
366 if new_state.lookahead == Lookahead::None
367 && new_state.atomicity != Atomicity::Atomic
368 {
369 new_state.queue.truncate(index);
370 }
371
372 Err(new_state)
373 }
374 }
375 }
376
377 /// Tag current node
378 ///
379 /// # Examples
380 ///
381 /// Try to recognize the one specified in a set of characters
382 ///
383 /// ```
384 /// use pest::{state, ParseResult, ParserState, iterators::Pair};
385 /// #[allow(non_camel_case_types)]
386 /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
387 /// enum Rule {
388 /// character,
389 /// }
390 /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
391 /// state.sequence(|state| {
392 /// character(state)
393 /// .and_then(|state| character(state))
394 /// .and_then(|state| character(state))
395 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("c")))
396 /// .and_then(|state| character(state))
397 /// })
398 /// }
399 /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
400 /// state.rule(Rule::character, |state| state.match_range('a'..'z'))
401 /// }
402 ///
403 /// let input = "abcd";
404 /// let pairs = state(input, mark_c).unwrap();
405 /// // find all node tag as `c`
406 /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
407 /// assert_eq!(find[0].as_str(), "c")
408 /// ```
409 #[inline]
410 pub fn tag_node(mut self: Box<Self>, tag: Cow<'i, str>) -> ParseResult<Box<Self>> {
411 if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
412 *old = Some(tag)
413 }
414 Ok(self)
415 }
416
417 fn attempts_at(&self, pos: usize) -> usize {
418 if self.attempt_pos == pos {
419 self.pos_attempts.len() + self.neg_attempts.len()
420 } else {
421 0
422 }
423 }
424
425 fn track(
426 &mut self,
427 rule: R,
428 pos: usize,
429 pos_attempts_index: usize,
430 neg_attempts_index: usize,
431 prev_attempts: usize,
432 ) {
433 if self.atomicity == Atomicity::Atomic {
434 return;
435 }
436
437 // If nested rules made no progress, there is no use to report them; it's only useful to
438 // track the current rule, the exception being when only one attempt has been made during
439 // the children rules.
440 let curr_attempts = self.attempts_at(pos);
441 if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
442 return;
443 }
444
445 if pos == self.attempt_pos {
446 self.pos_attempts.truncate(pos_attempts_index);
447 self.neg_attempts.truncate(neg_attempts_index);
448 }
449
450 if pos > self.attempt_pos {
451 self.pos_attempts.clear();
452 self.neg_attempts.clear();
453 self.attempt_pos = pos;
454 }
455
456 let attempts = if self.lookahead != Lookahead::Negative {
457 &mut self.pos_attempts
458 } else {
459 &mut self.neg_attempts
460 };
461
462 if pos == self.attempt_pos {
463 attempts.push(rule);
464 }
465 }
466
467 /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
468 /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
469 /// `Box<ParserState>` otherwise.
470 ///
471 /// This method is useful to parse sequences that only match together which usually come in the
472 /// form of chained `Result`s with
473 /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
474 ///
475 ///
476 /// # Examples
477 ///
478 /// ```
479 /// # use pest;
480 /// # #[allow(non_camel_case_types)]
481 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
482 /// enum Rule {
483 /// a
484 /// }
485 ///
486 /// let input = "a";
487 /// let pairs: Vec<_> = pest::state(input, |state| {
488 /// state.sequence(|s| {
489 /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
490 /// s.match_string("b")
491 /// })
492 /// }).or_else(|s| {
493 /// Ok(s)
494 /// })
495 /// }).unwrap().collect();
496 ///
497 /// assert_eq!(pairs.len(), 0);
498 /// ```
499 #[inline]
500 pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
501 where
502 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
503 {
504 self = self.inc_call_check_limit()?;
505 let token_index = self.queue.len();
506 let initial_pos = self.position;
507
508 let result = f(self);
509
510 match result {
511 Ok(new_state) => Ok(new_state),
512 Err(mut new_state) => {
513 // Restore the initial position and truncate the token queue.
514 new_state.position = initial_pos;
515 new_state.queue.truncate(token_index);
516 Err(new_state)
517 }
518 }
519 }
520
521 /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
522 /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
523 ///
524 /// # Examples
525 ///
526 /// ```
527 /// # use pest;
528 /// # #[allow(non_camel_case_types)]
529 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
530 /// enum Rule {
531 /// ab
532 /// }
533 ///
534 /// let input = "aab";
535 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
536 /// let mut result = state.repeat(|s| {
537 /// s.match_string("a")
538 /// });
539 /// assert!(result.is_ok());
540 /// assert_eq!(result.unwrap().position().pos(), 2);
541 ///
542 /// state = pest::ParserState::new(input);
543 /// result = state.repeat(|s| {
544 /// s.match_string("b")
545 /// });
546 /// assert!(result.is_ok());
547 /// assert_eq!(result.unwrap().position().pos(), 0);
548 /// ```
549 #[inline]
550 pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
551 where
552 F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
553 {
554 self = self.inc_call_check_limit()?;
555 let mut result = f(self);
556
557 loop {
558 match result {
559 Ok(state) => result = f(state),
560 Err(state) => return Ok(state),
561 };
562 }
563 }
564
565 /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
566 /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
567 ///
568 /// # Examples
569 ///
570 /// ```
571 /// # use pest;
572 /// # #[allow(non_camel_case_types)]
573 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
574 /// enum Rule {
575 /// ab
576 /// }
577 ///
578 /// let input = "ab";
579 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
580 /// let result = state.optional(|s| {
581 /// s.match_string("ab")
582 /// });
583 /// assert!(result.is_ok());
584 ///
585 /// state = pest::ParserState::new(input);
586 /// let result = state.optional(|s| {
587 /// s.match_string("ac")
588 /// });
589 /// assert!(result.is_ok());
590 /// ```
591 #[inline]
592 pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
593 where
594 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
595 {
596 self = self.inc_call_check_limit()?;
597 match f(self) {
598 Ok(state) | Err(state) => Ok(state),
599 }
600 }
601
602 /// Attempts to match a single character based on a filter function. Returns `Ok` with the
603 /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
604 /// otherwise.
605 ///
606 /// # Examples
607 ///
608 /// ```
609 /// # use pest;
610 /// # #[allow(non_camel_case_types)]
611 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
612 /// enum Rule {}
613 ///
614 /// let input = "ab";
615 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
616 /// let result = state.match_char_by(|c| c.is_ascii());
617 /// assert!(result.is_ok());
618 /// assert_eq!(result.unwrap().position().pos(), 1);
619 ///
620 /// let input = "❤";
621 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
622 /// let result = state.match_char_by(|c| c.is_ascii());
623 /// assert!(result.is_err());
624 /// assert_eq!(result.unwrap_err().position().pos(), 0);
625 /// ```
626 #[inline]
627 pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
628 where
629 F: FnOnce(char) -> bool,
630 {
631 if self.position.match_char_by(f) {
632 Ok(self)
633 } else {
634 Err(self)
635 }
636 }
637
638 /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
639 /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
640 ///
641 /// # Examples
642 ///
643 /// ```
644 /// # use pest;
645 /// # #[allow(non_camel_case_types)]
646 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
647 /// enum Rule {}
648 ///
649 /// let input = "ab";
650 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
651 /// let mut result = state.match_string("ab");
652 /// assert!(result.is_ok());
653 /// assert_eq!(result.unwrap().position().pos(), 2);
654 ///
655 /// state = pest::ParserState::new(input);
656 /// result = state.match_string("ac");
657 /// assert!(result.is_err());
658 /// assert_eq!(result.unwrap_err().position().pos(), 0);
659 /// ```
660 #[inline]
661 pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
662 if self.position.match_string(string) {
663 Ok(self)
664 } else {
665 Err(self)
666 }
667 }
668
669 /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
670 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
671 ///
672 /// # Examples
673 ///
674 /// ```
675 /// # use pest;
676 /// # #[allow(non_camel_case_types)]
677 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
678 /// enum Rule {}
679 ///
680 /// let input = "ab";
681 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
682 /// let mut result = state.match_insensitive("AB");
683 /// assert!(result.is_ok());
684 /// assert_eq!(result.unwrap().position().pos(), 2);
685 ///
686 /// state = pest::ParserState::new(input);
687 /// result = state.match_insensitive("AC");
688 /// assert!(result.is_err());
689 /// assert_eq!(result.unwrap_err().position().pos(), 0);
690 /// ```
691 #[inline]
692 pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
693 if self.position.match_insensitive(string) {
694 Ok(self)
695 } else {
696 Err(self)
697 }
698 }
699
700 /// Attempts to match a single character from the given range. Returns `Ok` with the updated
701 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
702 ///
703 /// # Caution
704 /// The provided `range` is intepreted as inclusive.
705 ///
706 /// # Examples
707 ///
708 /// ```
709 /// # use pest;
710 /// # #[allow(non_camel_case_types)]
711 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
712 /// enum Rule {}
713 ///
714 /// let input = "ab";
715 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
716 /// let mut result = state.match_range('a'..'z');
717 /// assert!(result.is_ok());
718 /// assert_eq!(result.unwrap().position().pos(), 1);
719 ///
720 /// state = pest::ParserState::new(input);
721 /// result = state.match_range('A'..'Z');
722 /// assert!(result.is_err());
723 /// assert_eq!(result.unwrap_err().position().pos(), 0);
724 /// ```
725 #[inline]
726 pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
727 if self.position.match_range(range) {
728 Ok(self)
729 } else {
730 Err(self)
731 }
732 }
733
734 /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
735 /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
736 ///
737 /// # Examples
738 ///
739 /// ```
740 /// # use pest;
741 /// # #[allow(non_camel_case_types)]
742 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
743 /// enum Rule {}
744 ///
745 /// let input = "ab";
746 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
747 /// let mut result = state.skip(1);
748 /// assert!(result.is_ok());
749 /// assert_eq!(result.unwrap().position().pos(), 1);
750 ///
751 /// state = pest::ParserState::new(input);
752 /// result = state.skip(3);
753 /// assert!(result.is_err());
754 /// assert_eq!(result.unwrap_err().position().pos(), 0);
755 /// ```
756 #[inline]
757 pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
758 if self.position.skip(n) {
759 Ok(self)
760 } else {
761 Err(self)
762 }
763 }
764
765 /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
766 /// updated `Box<ParserState>` whether or not one of the strings is found.
767 ///
768 /// # Examples
769 ///
770 /// ```
771 /// # use pest;
772 /// # #[allow(non_camel_case_types)]
773 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
774 /// enum Rule {}
775 ///
776 /// let input = "abcd";
777 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
778 /// let mut result = state.skip_until(&["c", "d"]);
779 /// assert!(result.is_ok());
780 /// assert_eq!(result.unwrap().position().pos(), 2);
781 /// ```
782 #[inline]
783 pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
784 self.position.skip_until(strings);
785 Ok(self)
786 }
787
788 /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
789 /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
790 ///
791 /// # Examples
792 ///
793 /// ```
794 /// # use pest;
795 /// # #[allow(non_camel_case_types)]
796 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
797 /// enum Rule {}
798 ///
799 /// let input = "ab";
800 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
801 /// let mut result = state.start_of_input();
802 /// assert!(result.is_ok());
803 ///
804 /// state = pest::ParserState::new(input);
805 /// state = state.match_string("ab").unwrap();
806 /// result = state.start_of_input();
807 /// assert!(result.is_err());
808 /// ```
809 #[inline]
810 pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
811 if self.position.at_start() {
812 Ok(self)
813 } else {
814 Err(self)
815 }
816 }
817
818 /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
819 /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
820 ///
821 /// # Examples
822 ///
823 /// ```
824 /// # use pest;
825 /// # #[allow(non_camel_case_types)]
826 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
827 /// enum Rule {}
828 ///
829 /// let input = "ab";
830 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
831 /// let mut result = state.end_of_input();
832 /// assert!(result.is_err());
833 ///
834 /// state = pest::ParserState::new(input);
835 /// state = state.match_string("ab").unwrap();
836 /// result = state.end_of_input();
837 /// assert!(result.is_ok());
838 /// ```
839 #[inline]
840 pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
841 if self.position.at_end() {
842 Ok(self)
843 } else {
844 Err(self)
845 }
846 }
847
848 /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
849 /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
850 /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
851 /// together, negating the `Result`.
852 ///
853 /// # Examples
854 ///
855 /// ```
856 /// # use pest;
857 /// # #[allow(non_camel_case_types)]
858 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
859 /// enum Rule {
860 /// a
861 /// }
862 ///
863 /// let input = "a";
864 /// let pairs: Vec<_> = pest::state(input, |state| {
865 /// state.lookahead(true, |state| {
866 /// state.rule(Rule::a, |s| Ok(s))
867 /// })
868 /// }).unwrap().collect();
869 ///
870 /// assert_eq!(pairs.len(), 0);
871 /// ```
872 #[inline]
873 pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
874 where
875 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
876 {
877 self = self.inc_call_check_limit()?;
878 let initial_lookahead = self.lookahead;
879
880 self.lookahead = if is_positive {
881 match initial_lookahead {
882 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
883 Lookahead::Negative => Lookahead::Negative,
884 }
885 } else {
886 match initial_lookahead {
887 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
888 Lookahead::Negative => Lookahead::Positive,
889 }
890 };
891
892 let initial_pos = self.position;
893
894 let result = f(self.checkpoint());
895
896 let result_state = match result {
897 Ok(mut new_state) => {
898 new_state.position = initial_pos;
899 new_state.lookahead = initial_lookahead;
900 Ok(new_state.restore())
901 }
902 Err(mut new_state) => {
903 new_state.position = initial_pos;
904 new_state.lookahead = initial_lookahead;
905 Err(new_state.restore())
906 }
907 };
908
909 if is_positive {
910 result_state
911 } else {
912 match result_state {
913 Ok(state) => Err(state),
914 Err(state) => Ok(state),
915 }
916 }
917 }
918
919 /// Transformation which stops `Token`s from being generated according to `is_atomic`.
920 ///
921 /// # Examples
922 ///
923 /// ```
924 /// # use pest::{self, Atomicity};
925 /// # #[allow(non_camel_case_types)]
926 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
927 /// enum Rule {
928 /// a
929 /// }
930 ///
931 /// let input = "a";
932 /// let pairs: Vec<_> = pest::state(input, |state| {
933 /// state.atomic(Atomicity::Atomic, |s| {
934 /// s.rule(Rule::a, |s| Ok(s))
935 /// })
936 /// }).unwrap().collect();
937 ///
938 /// assert_eq!(pairs.len(), 0);
939 /// ```
940 #[inline]
941 pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
942 where
943 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
944 {
945 self = self.inc_call_check_limit()?;
946 let initial_atomicity = self.atomicity;
947 let should_toggle = self.atomicity != atomicity;
948
949 if should_toggle {
950 self.atomicity = atomicity;
951 }
952
953 let result = f(self);
954
955 match result {
956 Ok(mut new_state) => {
957 if should_toggle {
958 new_state.atomicity = initial_atomicity;
959 }
960 Ok(new_state)
961 }
962 Err(mut new_state) => {
963 if should_toggle {
964 new_state.atomicity = initial_atomicity;
965 }
966 Err(new_state)
967 }
968 }
969 }
970
971 /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
972 /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
973 /// called successfully, or `Err(Box<ParserState>)` otherwise.
974 ///
975 /// # Examples
976 ///
977 /// ```
978 /// # use pest;
979 /// # #[allow(non_camel_case_types)]
980 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
981 /// enum Rule {}
982 ///
983 /// let input = "ab";
984 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
985 /// let mut result = state.stack_push(|state| state.match_string("a"));
986 /// assert!(result.is_ok());
987 /// assert_eq!(result.unwrap().position().pos(), 1);
988 /// ```
989 #[inline]
990 pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
991 where
992 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
993 {
994 self = self.inc_call_check_limit()?;
995 let start = self.position;
996
997 let result = f(self);
998
999 match result {
1000 Ok(mut state) => {
1001 let end = state.position;
1002 state.stack.push(start.span(&end));
1003 Ok(state)
1004 }
1005 Err(state) => Err(state),
1006 }
1007 }
1008
1009 /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1010 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1011 ///
1012 /// # Examples
1013 ///
1014 /// ```
1015 /// # use pest;
1016 /// # #[allow(non_camel_case_types)]
1017 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1018 /// enum Rule {}
1019 ///
1020 /// let input = "aa";
1021 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1022 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1023 /// |state| state.stack_peek()
1024 /// );
1025 /// assert!(result.is_ok());
1026 /// assert_eq!(result.unwrap().position().pos(), 2);
1027 /// ```
1028 #[inline]
1029 pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1030 let string = self
1031 .stack
1032 .peek()
1033 .expect("peek was called on empty stack")
1034 .as_str();
1035 self.match_string(string)
1036 }
1037
1038 /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1039 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1040 ///
1041 /// # Examples
1042 ///
1043 /// ```
1044 /// # use pest;
1045 /// # #[allow(non_camel_case_types)]
1046 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1047 /// enum Rule {}
1048 ///
1049 /// let input = "aa";
1050 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1051 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1052 /// |state| state.stack_pop()
1053 /// );
1054 /// assert!(result.is_ok());
1055 /// assert_eq!(result.unwrap().position().pos(), 2);
1056 /// ```
1057 #[inline]
1058 pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1059 let string = self
1060 .stack
1061 .pop()
1062 .expect("pop was called on empty stack")
1063 .as_str();
1064 self.match_string(string)
1065 }
1066
1067 /// Matches part of the state of the stack.
1068 ///
1069 /// # Examples
1070 ///
1071 /// ```
1072 /// # use pest::{self, MatchDir};
1073 /// # #[allow(non_camel_case_types)]
1074 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1075 /// enum Rule {}
1076 ///
1077 /// let input = "abcd cd cb";
1078 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1079 /// let mut result = state
1080 /// .stack_push(|state| state.match_string("a"))
1081 /// .and_then(|state| state.stack_push(|state| state.match_string("b")))
1082 /// .and_then(|state| state.stack_push(|state| state.match_string("c")))
1083 /// .and_then(|state| state.stack_push(|state| state.match_string("d")))
1084 /// .and_then(|state| state.match_string(" "))
1085 /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1086 /// .and_then(|state| state.match_string(" "))
1087 /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1088 /// assert!(result.is_ok());
1089 /// assert_eq!(result.unwrap().position().pos(), 10);
1090 /// ```
1091 #[inline]
1092 pub fn stack_match_peek_slice(
1093 mut self: Box<Self>,
1094 start: i32,
1095 end: Option<i32>,
1096 match_dir: MatchDir,
1097 ) -> ParseResult<Box<Self>> {
1098 let range = match constrain_idxs(start, end, self.stack.len()) {
1099 Some(r) => r,
1100 None => return Err(self),
1101 };
1102 // return true if an empty sequence is requested
1103 if range.end <= range.start {
1104 return Ok(self);
1105 }
1106
1107 let mut position = self.position;
1108 let result = {
1109 let mut iter_b2t = self.stack[range].iter();
1110 let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1111 match match_dir {
1112 MatchDir::BottomToTop => iter_b2t.all(matcher),
1113 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1114 }
1115 };
1116 if result {
1117 self.position = position;
1118 Ok(self)
1119 } else {
1120 Err(self)
1121 }
1122 }
1123
1124 /// Matches the full state of the stack.
1125 ///
1126 /// # Examples
1127 ///
1128 /// ```
1129 /// # use pest;
1130 /// # #[allow(non_camel_case_types)]
1131 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1132 /// enum Rule {}
1133 ///
1134 /// let input = "abba";
1135 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1136 /// let mut result = state
1137 /// .stack_push(|state| state.match_string("a"))
1138 /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1139 /// .and_then(|state| state.stack_match_peek());
1140 /// assert!(result.is_ok());
1141 /// assert_eq!(result.unwrap().position().pos(), 4);
1142 /// ```
1143 #[inline]
1144 pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1145 self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1146 }
1147
1148 /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1149 ///
1150 /// # Examples
1151 ///
1152 /// ```
1153 /// /// # use pest;
1154 /// # #[allow(non_camel_case_types)]
1155 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1156 /// enum Rule {}
1157 ///
1158 /// let input = "aaaa";
1159 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1160 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1161 /// state.stack_push(|state| state.match_string("a"))
1162 /// }).and_then(|state| state.stack_match_peek());
1163 /// assert!(result.is_ok());
1164 /// assert_eq!(result.unwrap().position().pos(), 4);
1165 /// ```
1166 #[inline]
1167 pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1168 let mut position = self.position;
1169 let mut result = true;
1170 while let Some(span) = self.stack.pop() {
1171 result = position.match_string(span.as_str());
1172 if !result {
1173 break;
1174 }
1175 }
1176
1177 if result {
1178 self.position = position;
1179 Ok(self)
1180 } else {
1181 Err(self)
1182 }
1183 }
1184
1185 /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1186 /// `Err(Box<ParserState>)` otherwise.
1187 ///
1188 /// # Examples
1189 ///
1190 /// ```
1191 /// # use pest;
1192 /// # #[allow(non_camel_case_types)]
1193 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1194 /// enum Rule {}
1195 ///
1196 /// let input = "aa";
1197 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1198 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1199 /// |state| state.stack_drop()
1200 /// );
1201 /// assert!(result.is_ok());
1202 /// assert_eq!(result.unwrap().position().pos(), 1);
1203 /// ```
1204 #[inline]
1205 pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1206 match self.stack.pop() {
1207 Some(_) => Ok(self),
1208 None => Err(self),
1209 }
1210 }
1211
1212 /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1213 /// this method only restores the stack.
1214 ///
1215 /// # Examples
1216 ///
1217 /// ```
1218 /// # use pest;
1219 /// # #[allow(non_camel_case_types)]
1220 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1221 /// enum Rule {}
1222 ///
1223 /// let input = "ab";
1224 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1225 /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1226 /// state.match_string("a")).and_then(|state| state.match_string("a"))
1227 /// );
1228 ///
1229 /// assert!(result.is_err());
1230 ///
1231 /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1232 /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1233 /// assert!(catch_panic.is_err());
1234 /// ```
1235 #[inline]
1236 pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1237 where
1238 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1239 {
1240 match f(self.checkpoint()) {
1241 Ok(state) => Ok(state.checkpoint_ok()),
1242 Err(state) => Err(state.restore()),
1243 }
1244 }
1245
1246 // Mark the current state as a checkpoint and return the `Box`.
1247 #[inline]
1248 pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1249 self.stack.snapshot();
1250 self
1251 }
1252
1253 // The checkpoint was cleared successfully
1254 // so remove it without touching other stack state.
1255 #[inline]
1256 pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1257 self.stack.clear_snapshot();
1258 self
1259 }
1260
1261 // Restore the current state to the most recent checkpoint.
1262 #[inline]
1263 pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1264 self.stack.restore();
1265 self
1266 }
1267}
1268
1269fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1270 let start_norm: usize = normalize_index(i:start, len)?;
1271 let end_norm: usize = end.map_or(default:Some(len), |e: i32| normalize_index(i:e, len))?;
1272 Some(start_norm..end_norm)
1273}
1274
1275/// Normalizes the index using its sequence’s length.
1276/// Returns `None` if the normalized index is OOB.
1277fn normalize_index(i: i32, len: usize) -> Option<usize> {
1278 if i > len as i32 {
1279 None
1280 } else if i >= 0 {
1281 Some(i as usize)
1282 } else {
1283 let real_i: i32 = len as i32 + i;
1284 if real_i >= 0 {
1285 Some(real_i as usize)
1286 } else {
1287 None
1288 }
1289 }
1290}
1291
1292#[cfg(test)]
1293mod test {
1294 use super::*;
1295
1296 #[test]
1297 fn normalize_index_pos() {
1298 assert_eq!(normalize_index(4, 6), Some(4));
1299 assert_eq!(normalize_index(5, 5), Some(5));
1300 assert_eq!(normalize_index(6, 3), None);
1301 }
1302
1303 #[test]
1304 fn normalize_index_neg() {
1305 assert_eq!(normalize_index(-4, 6), Some(2));
1306 assert_eq!(normalize_index(-5, 5), Some(0));
1307 assert_eq!(normalize_index(-6, 3), None);
1308 }
1309}
1310