1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12use alloc::string::String;
13use alloc::vec::Vec;
14use core::fmt;
15use core::hash::{Hash, Hasher};
16use core::iter::Filter;
17use core::ptr;
18use core::str;
19
20#[cfg(feature = "pretty-print")]
21use serde::ser::SerializeStruct;
22
23use super::flat_pairs::{self, FlatPairs};
24use super::line_index::LineIndex;
25use super::pair::{self, Pair};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::RuleType;
29
30/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
31///
32/// [`Pair`]: struct.Pair.html
33/// [`pest::state`]: ../fn.state.html
34/// [`Pair::into_inner`]: struct.Pair.html#method.into_inner
35#[derive(Clone)]
36pub struct Pairs<'i, R> {
37 queue: Rc<Vec<QueueableToken<'i, R>>>,
38 input: &'i str,
39 start: usize,
40 end: usize,
41 pairs_count: usize,
42 line_index: Rc<LineIndex>,
43}
44
45pub fn new<'i, R: RuleType>(
46 queue: Rc<Vec<QueueableToken<'i, R>>>,
47 input: &'i str,
48 line_index: Option<Rc<LineIndex>>,
49 start: usize,
50 end: usize,
51) -> Pairs<'i, R> {
52 let line_index = match line_index {
53 Some(line_index) => line_index,
54 None => Rc::new(LineIndex::new(input)),
55 };
56
57 let mut pairs_count = 0;
58 let mut cursor = start;
59 while cursor < end {
60 cursor = match queue[cursor] {
61 QueueableToken::Start {
62 end_token_index, ..
63 } => end_token_index,
64 _ => unreachable!(),
65 } + 1;
66 pairs_count += 1;
67 }
68
69 Pairs {
70 queue,
71 input,
72 start,
73 end,
74 pairs_count,
75 line_index,
76 }
77}
78
79impl<'i, R: RuleType> Pairs<'i, R> {
80 /// Captures a slice from the `&str` defined by the starting position of the first token `Pair`
81 /// and the ending position of the last token `Pair` of the `Pairs`. This also captures
82 /// the input between those two token `Pair`s.
83 ///
84 /// # Examples
85 ///
86 /// ```
87 /// # use std::rc::Rc;
88 /// # use pest;
89 /// # #[allow(non_camel_case_types)]
90 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
91 /// enum Rule {
92 /// a,
93 /// b
94 /// }
95 ///
96 /// let input = "a b";
97 /// let pairs = pest::state(input, |state| {
98 /// // generating Token pairs with Rule::a and Rule::b ...
99 /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
100 /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
101 /// }).unwrap();
102 ///
103 /// assert_eq!(pairs.as_str(), "a b");
104 /// ```
105 #[inline]
106 pub fn as_str(&self) -> &'i str {
107 if self.start < self.end {
108 let start = self.pos(self.start);
109 let end = self.pos(self.end - 1);
110 // Generated positions always come from Positions and are UTF-8 borders.
111 &self.input[start..end]
112 } else {
113 ""
114 }
115 }
116
117 /// Captures inner token `Pair`s and concatenates resulting `&str`s. This does not capture
118 /// the input between token `Pair`s.
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// # use std::rc::Rc;
124 /// # use pest;
125 /// # #[allow(non_camel_case_types)]
126 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
127 /// enum Rule {
128 /// a,
129 /// b
130 /// }
131 ///
132 /// let input = "a b";
133 /// let pairs = pest::state(input, |state| {
134 /// // generating Token pairs with Rule::a and Rule::b ...
135 /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
136 /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
137 /// }).unwrap();
138 ///
139 /// assert_eq!(pairs.concat(), "ab");
140 /// ```
141 #[inline]
142 pub fn concat(&self) -> String {
143 self.clone()
144 .fold(String::new(), |string, pair| string + pair.as_str())
145 }
146
147 /// Flattens the `Pairs`.
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// # use std::rc::Rc;
153 /// # use pest;
154 /// # #[allow(non_camel_case_types)]
155 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
156 /// enum Rule {
157 /// a,
158 /// b
159 /// }
160 ///
161 /// let input = "";
162 /// let pairs = pest::state(input, |state| {
163 /// // generating nested Token pair with Rule::b inside Rule::a
164 /// # state.rule(Rule::a, |state| {
165 /// # state.rule(Rule::b, |s| Ok(s))
166 /// # })
167 /// }).unwrap();
168 /// let tokens: Vec<_> = pairs.flatten().tokens().collect();
169 ///
170 /// assert_eq!(tokens.len(), 4);
171 /// ```
172 #[inline]
173 pub fn flatten(self) -> FlatPairs<'i, R> {
174 unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
175 }
176
177 /// Finds the first pair that has its node or branch tagged with the provided
178 /// label.
179 ///
180 /// # Examples
181 ///
182 /// Try to recognize the branch between add and mul
183 /// ```
184 /// use pest::{state, ParseResult, ParserState};
185 /// #[allow(non_camel_case_types)]
186 /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
187 /// enum Rule {
188 /// number, // 0..9
189 /// add, // num + num
190 /// mul, // num * num
191 /// }
192 /// fn mark_branch(
193 /// state: Box<ParserState<'_, Rule>>,
194 /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
195 /// expr(state, Rule::mul, "*")
196 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul")))
197 /// .or_else(|state| expr(state, Rule::add, "+"))
198 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add")))
199 /// }
200 /// fn expr<'a>(
201 /// state: Box<ParserState<'a, Rule>>,
202 /// r: Rule,
203 /// o: &'static str,
204 /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
205 /// state.rule(r, |state| {
206 /// state.sequence(|state| {
207 /// number(state)
208 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs")))
209 /// .and_then(|state| state.match_string(o))
210 /// .and_then(number)
211 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs")))
212 /// })
213 /// })
214 /// }
215 /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
216 /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
217 /// }
218 /// let input = "1+2";
219 /// let pairs = state(input, mark_branch).unwrap();
220 /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
221 /// assert_eq!(pairs.find_first_tagged("mul"), None);
222 /// ```
223 #[inline]
224 pub fn find_first_tagged(&self, tag: &'i str) -> Option<Pair<'i, R>> {
225 self.clone().find_tagged(tag).next()
226 }
227
228 /// Returns the iterator over pairs that have their node or branch tagged
229 /// with the provided label.
230 ///
231 /// # Examples
232 ///
233 /// Try to recognize the node between left and right hand side
234 /// ```
235 /// use pest::{state, ParseResult, ParserState};
236 /// #[allow(non_camel_case_types)]
237 /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
238 /// enum Rule {
239 /// number, // 0..9
240 /// add, // num + num
241 /// mul, // num * num
242 /// }
243 /// fn mark_branch(
244 /// state: Box<ParserState<'_, Rule>>,
245 /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
246 /// expr(state, Rule::mul, "*")
247 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("mul")))
248 /// .or_else(|state| expr(state, Rule::add, "+"))
249 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("add")))
250 /// }
251 /// fn expr<'a>(
252 /// state: Box<ParserState<'a, Rule>>,
253 /// r: Rule,
254 /// o: &'static str,
255 /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
256 /// state.rule(r, |state| {
257 /// state.sequence(|state| {
258 /// number(state)
259 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("lhs")))
260 /// .and_then(|state| state.match_string(o))
261 /// .and_then(number)
262 /// .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("rhs")))
263 /// })
264 /// })
265 /// }
266 /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
267 /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
268 /// }
269 ///
270 /// let input = "1+2";
271 /// let pairs = state(input, mark_branch).unwrap();
272 /// let mut left_numbers = pairs.find_tagged("lhs");
273 /// assert_eq!(left_numbers.next().unwrap().as_str(), "1");
274 /// assert_eq!(left_numbers.next(), None);
275 /// ```
276 #[inline]
277 pub fn find_tagged(
278 self,
279 tag: &'i str,
280 ) -> Filter<FlatPairs<'i, R>, impl FnMut(&Pair<'i, R>) -> bool + '_> {
281 self.flatten()
282 .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag))
283 }
284
285 /// Returns the `Tokens` for the `Pairs`.
286 ///
287 /// # Examples
288 ///
289 /// ```
290 /// # use std::rc::Rc;
291 /// # use pest;
292 /// # #[allow(non_camel_case_types)]
293 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
294 /// enum Rule {
295 /// a
296 /// }
297 ///
298 /// let input = "";
299 /// let pairs = pest::state(input, |state| {
300 /// // generating Token pair with Rule::a ...
301 /// # state.rule(Rule::a, |s| Ok(s))
302 /// }).unwrap();
303 /// let tokens: Vec<_> = pairs.tokens().collect();
304 ///
305 /// assert_eq!(tokens.len(), 2);
306 /// ```
307 #[inline]
308 pub fn tokens(self) -> Tokens<'i, R> {
309 tokens::new(self.queue, self.input, self.start, self.end)
310 }
311
312 /// Peek at the first inner `Pair` without changing the position of this iterator.
313 #[inline]
314 pub fn peek(&self) -> Option<Pair<'i, R>> {
315 if self.start < self.end {
316 Some(unsafe {
317 pair::new(
318 Rc::clone(&self.queue),
319 self.input,
320 Rc::clone(&self.line_index),
321 self.start,
322 )
323 })
324 } else {
325 None
326 }
327 }
328
329 /// Generates a string that stores the lexical information of `self` in
330 /// a pretty-printed JSON format.
331 #[cfg(feature = "pretty-print")]
332 pub fn to_json(&self) -> String {
333 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pairs to json.")
334 }
335
336 fn pair(&self) -> usize {
337 match self.queue[self.start] {
338 QueueableToken::Start {
339 end_token_index, ..
340 } => end_token_index,
341 _ => unreachable!(),
342 }
343 }
344
345 fn pair_from_end(&self) -> usize {
346 match self.queue[self.end - 1] {
347 QueueableToken::End {
348 start_token_index, ..
349 } => start_token_index,
350 _ => unreachable!(),
351 }
352 }
353
354 fn pos(&self, index: usize) -> usize {
355 match self.queue[index] {
356 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
357 input_pos
358 }
359 }
360 }
361}
362
363impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> {
364 #[inline]
365 fn len(&self) -> usize {
366 self.pairs_count
367 }
368}
369
370impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
371 type Item = Pair<'i, R>;
372
373 fn next(&mut self) -> Option<Self::Item> {
374 let pair: Pair<'_, R> = self.peek()?;
375
376 self.start = self.pair() + 1;
377 self.pairs_count -= 1;
378 Some(pair)
379 }
380
381 fn size_hint(&self) -> (usize, Option<usize>) {
382 let len: usize = <Self as ExactSizeIterator>::len(self);
383 (len, Some(len))
384 }
385}
386
387impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
388 fn next_back(&mut self) -> Option<Self::Item> {
389 if self.end <= self.start {
390 return None;
391 }
392
393 self.end = self.pair_from_end();
394 self.pairs_count -= 1;
395
396 let pair: Pair<'_, R> = unsafe {
397 pair::new(
398 queue:Rc::clone(&self.queue),
399 self.input,
400 line_index:Rc::clone(&self.line_index),
401 self.end,
402 )
403 };
404
405 Some(pair)
406 }
407}
408
409impl<'i, R: RuleType> fmt::Debug for Pairs<'i, R> {
410 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
411 f.debug_list().entries(self.clone()).finish()
412 }
413}
414
415impl<'i, R: RuleType> fmt::Display for Pairs<'i, R> {
416 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
417 write!(
418 f,
419 "[{}]",
420 self.clone()
421 .map(|pair| format!("{}", pair))
422 .collect::<Vec<_>>()
423 .join(", ")
424 )
425 }
426}
427
428impl<'i, R: PartialEq> PartialEq for Pairs<'i, R> {
429 fn eq(&self, other: &Pairs<'i, R>) -> bool {
430 Rc::ptr_eq(&self.queue, &other.queue)
431 && ptr::eq(self.input, b:other.input)
432 && self.start == other.start
433 && self.end == other.end
434 }
435}
436
437impl<'i, R: Eq> Eq for Pairs<'i, R> {}
438
439impl<'i, R: Hash> Hash for Pairs<'i, R> {
440 fn hash<H: Hasher>(&self, state: &mut H) {
441 (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
442 (self.input as *const str).hash(state);
443 self.start.hash(state);
444 self.end.hash(state);
445 }
446}
447
448#[cfg(feature = "pretty-print")]
449impl<'i, R: RuleType> ::serde::Serialize for Pairs<'i, R> {
450 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
451 where
452 S: ::serde::Serializer,
453 {
454 let start = self.pos(self.start);
455 let end = self.pos(self.end - 1);
456 let pairs = self.clone().collect::<Vec<_>>();
457
458 let mut ser = serializer.serialize_struct("Pairs", 2)?;
459 ser.serialize_field("pos", &(start, end))?;
460 ser.serialize_field("pairs", &pairs)?;
461 ser.end()
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::super::super::macros::tests::*;
468 use super::super::super::Parser;
469 use alloc::borrow::ToOwned;
470 use alloc::boxed::Box;
471 use alloc::format;
472 use alloc::vec;
473 use alloc::vec::Vec;
474
475 #[test]
476 #[cfg(feature = "pretty-print")]
477 fn test_pretty_print() {
478 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
479
480 let expected = r#"{
481 "pos": [
482 0,
483 5
484 ],
485 "pairs": [
486 {
487 "pos": [
488 0,
489 3
490 ],
491 "rule": "a",
492 "inner": {
493 "pos": [
494 1,
495 2
496 ],
497 "pairs": [
498 {
499 "pos": [
500 1,
501 2
502 ],
503 "rule": "b",
504 "inner": "b"
505 }
506 ]
507 }
508 },
509 {
510 "pos": [
511 4,
512 5
513 ],
514 "rule": "c",
515 "inner": "e"
516 }
517 ]
518}"#;
519
520 assert_eq!(expected, pairs.to_json());
521 }
522
523 #[test]
524 fn as_str() {
525 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
526
527 assert_eq!(pairs.as_str(), "abcde");
528 }
529
530 #[test]
531 fn as_str_empty() {
532 let mut pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
533
534 assert_eq!(pairs.nth(1).unwrap().into_inner().as_str(), "");
535 }
536
537 #[test]
538 fn concat() {
539 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
540
541 assert_eq!(pairs.concat(), "abce");
542 }
543
544 #[test]
545 fn pairs_debug() {
546 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
547
548 #[rustfmt::skip]
549 assert_eq!(
550 format!("{:?}", pairs),
551 "[\
552 Pair { rule: a, span: Span { str: \"abc\", start: 0, end: 3 }, inner: [\
553 Pair { rule: b, span: Span { str: \"b\", start: 1, end: 2 }, inner: [] }\
554 ] }, \
555 Pair { rule: c, span: Span { str: \"e\", start: 4, end: 5 }, inner: [] }\
556 ]"
557 .to_owned()
558 );
559 }
560
561 #[test]
562 fn pairs_display() {
563 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
564
565 assert_eq!(
566 format!("{}", pairs),
567 "[a(0, 3, [b(1, 2)]), c(4, 5)]".to_owned()
568 );
569 }
570
571 #[test]
572 fn iter_for_pairs() {
573 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
574 assert_eq!(
575 pairs.map(|p| p.as_rule()).collect::<Vec<Rule>>(),
576 vec![Rule::a, Rule::c]
577 );
578 }
579
580 #[test]
581 fn double_ended_iter_for_pairs() {
582 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
583 assert_eq!(
584 pairs.rev().map(|p| p.as_rule()).collect::<Vec<Rule>>(),
585 vec![Rule::c, Rule::a]
586 );
587 }
588
589 #[test]
590 fn test_line_col() {
591 let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
592 let pair = pairs.next().unwrap();
593 assert_eq!(pair.as_str(), "abc");
594 assert_eq!(pair.line_col(), (1, 1));
595
596 let pair = pairs.next().unwrap();
597 assert_eq!(pair.as_str(), "e");
598 assert_eq!(pair.line_col(), (2, 1));
599
600 let pair = pairs.next().unwrap();
601 assert_eq!(pair.as_str(), "fgh");
602 assert_eq!(pair.line_col(), (2, 2));
603 }
604
605 #[test]
606 fn test_rev_iter_line_col() {
607 let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
608 let pair = pairs.next().unwrap();
609 assert_eq!(pair.as_str(), "fgh");
610 assert_eq!(pair.line_col(), (2, 2));
611
612 let pair = pairs.next().unwrap();
613 assert_eq!(pair.as_str(), "e");
614 assert_eq!(pair.line_col(), (2, 1));
615
616 let pair = pairs.next().unwrap();
617 assert_eq!(pair.as_str(), "abc");
618 assert_eq!(pair.line_col(), (1, 1));
619 }
620
621 #[test]
622 fn test_tag_node_branch() {
623 use crate::{state, ParseResult, ParserState};
624 #[allow(non_camel_case_types)]
625 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
626 enum Rule {
627 number, // 0..9
628 add, // num + num
629 mul, // num * num
630 }
631 fn mark_branch(
632 state: Box<ParserState<'_, Rule>>,
633 ) -> ParseResult<Box<ParserState<'_, Rule>>> {
634 expr(state, Rule::mul, "*")
635 .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("mul")))
636 .or_else(|state| expr(state, Rule::add, "+"))
637 .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("add")))
638 }
639 fn expr<'a>(
640 state: Box<ParserState<'a, Rule>>,
641 r: Rule,
642 o: &'static str,
643 ) -> ParseResult<Box<ParserState<'a, Rule>>> {
644 state.rule(r, |state| {
645 state.sequence(|state| {
646 number(state)
647 .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("lhs")))
648 .and_then(|state| state.match_string(o))
649 .and_then(number)
650 .and_then(|state| state.tag_node(alloc::borrow::Cow::Borrowed("rhs")))
651 })
652 })
653 }
654 fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
655 state.rule(Rule::number, |state| state.match_range('0'..'9'))
656 }
657 let input = "1+2";
658 let pairs = state(input, mark_branch).unwrap();
659 assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
660 assert_eq!(pairs.find_first_tagged("mul"), None);
661
662 let mut left_numbers = pairs.clone().find_tagged("lhs");
663
664 assert_eq!(left_numbers.next().unwrap().as_str(), "1");
665 assert_eq!(left_numbers.next(), None);
666 let mut right_numbers = pairs.find_tagged("rhs");
667
668 assert_eq!(right_numbers.next().unwrap().as_str(), "2");
669 assert_eq!(right_numbers.next(), None);
670 }
671
672 #[test]
673 fn exact_size_iter_for_pairs() {
674 let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
675 assert_eq!(pairs.len(), pairs.count());
676
677 let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
678 assert_eq!(pairs.len(), pairs.count());
679
680 let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
681 let pairs_len = pairs.len();
682 let _ = pairs.next().unwrap();
683 assert_eq!(pairs.count() + 1, pairs_len);
684 }
685}
686