1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use alloc::rc::Rc; |
11 | use alloc::vec::Vec; |
12 | use core::fmt; |
13 | use core::str; |
14 | |
15 | use super::queueable_token::QueueableToken; |
16 | use crate::position; |
17 | use crate::token::Token; |
18 | use crate::RuleType; |
19 | |
20 | /// An iterator over [`Token`]s. It is created by [`Pair::tokens`] and [`Pairs::tokens`]. |
21 | /// |
22 | /// [`Token`]: ../enum.Token.html |
23 | /// [`Pair::tokens`]: struct.Pair.html#method.tokens |
24 | /// [`Pairs::tokens`]: struct.Pairs.html#method.tokens |
25 | #[derive (Clone)] |
26 | pub struct Tokens<'i, R> { |
27 | /// # Safety: |
28 | /// |
29 | /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. |
30 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
31 | input: &'i str, |
32 | start: usize, |
33 | end: usize, |
34 | } |
35 | |
36 | // TODO(safety): QueueableTokens must be valid indices into input. |
37 | pub fn new<'i, R: RuleType>( |
38 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
39 | input: &'i str, |
40 | start: usize, |
41 | end: usize, |
42 | ) -> Tokens<'i, R> { |
43 | if cfg!(debug_assertions) { |
44 | for tok: &QueueableToken<'_, R> in queue.iter() { |
45 | match *tok { |
46 | QueueableToken::Start { input_pos: usize, .. } | QueueableToken::End { input_pos: usize, .. } => { |
47 | assert!( |
48 | input.get(input_pos..).is_some(), |
49 | "💥 UNSAFE `Tokens` CREATED 💥" |
50 | ) |
51 | } |
52 | } |
53 | } |
54 | } |
55 | |
56 | Tokens { |
57 | queue, |
58 | input, |
59 | start, |
60 | end, |
61 | } |
62 | } |
63 | |
64 | impl<'i, R: RuleType> Tokens<'i, R> { |
65 | fn create_token(&self, index: usize) -> Token<'i, R> { |
66 | match self.queue[index] { |
67 | QueueableToken::Start { |
68 | end_token_index, |
69 | input_pos, |
70 | } => { |
71 | let rule = match self.queue[end_token_index] { |
72 | QueueableToken::End { rule, .. } => rule, |
73 | _ => unreachable!(), |
74 | }; |
75 | |
76 | Token::Start { |
77 | rule, |
78 | // QueueableTokens are safely created. |
79 | pos: unsafe { position::Position::new_unchecked(self.input, input_pos) }, |
80 | } |
81 | } |
82 | QueueableToken::End { |
83 | rule, input_pos, .. |
84 | } => { |
85 | Token::End { |
86 | rule, |
87 | // QueueableTokens are safely created. |
88 | pos: unsafe { position::Position::new_unchecked(self.input, input_pos) }, |
89 | } |
90 | } |
91 | } |
92 | } |
93 | } |
94 | |
95 | impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> { |
96 | fn len(&self) -> usize { |
97 | self.end - self.start |
98 | } |
99 | } |
100 | |
101 | impl<'i, R: RuleType> Iterator for Tokens<'i, R> { |
102 | type Item = Token<'i, R>; |
103 | |
104 | fn next(&mut self) -> Option<Self::Item> { |
105 | if self.start >= self.end { |
106 | return None; |
107 | } |
108 | |
109 | let token: Token<'_, R> = self.create_token(self.start); |
110 | |
111 | self.start += 1; |
112 | |
113 | Some(token) |
114 | } |
115 | |
116 | fn size_hint(&self) -> (usize, Option<usize>) { |
117 | let len: usize = <Self as ExactSizeIterator>::len(self); |
118 | (len, Some(len)) |
119 | } |
120 | } |
121 | |
122 | impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> { |
123 | fn next_back(&mut self) -> Option<Self::Item> { |
124 | if self.end <= self.start { |
125 | return None; |
126 | } |
127 | |
128 | let token: Token<'_, R> = self.create_token(self.end - 1); |
129 | |
130 | self.end -= 1; |
131 | |
132 | Some(token) |
133 | } |
134 | } |
135 | |
136 | impl<'i, R: RuleType> fmt::Debug for Tokens<'i, R> { |
137 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
138 | f.debug_list().entries(self.clone()).finish() |
139 | } |
140 | } |
141 | |
142 | #[cfg (test)] |
143 | mod tests { |
144 | use super::super::super::macros::tests::*; |
145 | use super::super::super::Parser; |
146 | use super::Token; |
147 | use alloc::vec::Vec; |
148 | |
149 | #[test ] |
150 | fn double_ended_iter_for_tokens() { |
151 | let pairs = AbcParser::parse(Rule::a, "abcde" ).unwrap(); |
152 | let mut tokens = pairs.clone().tokens().collect::<Vec<Token<'_, Rule>>>(); |
153 | tokens.reverse(); |
154 | let reverse_tokens = pairs.tokens().rev().collect::<Vec<Token<'_, Rule>>>(); |
155 | assert_eq!(tokens, reverse_tokens); |
156 | } |
157 | |
158 | #[test ] |
159 | fn exact_size_iter_for_tokens() { |
160 | let tokens = AbcParser::parse(Rule::a, "abcde" ).unwrap().tokens(); |
161 | assert_eq!(tokens.len(), tokens.count()); |
162 | |
163 | let tokens = AbcParser::parse(Rule::a, "我很漂亮e" ).unwrap().tokens(); |
164 | assert_eq!(tokens.len(), tokens.count()); |
165 | |
166 | let tokens = AbcParser::parse(Rule::a, "abcde" ).unwrap().tokens().rev(); |
167 | assert_eq!(tokens.len(), tokens.count()); |
168 | |
169 | let mut tokens = AbcParser::parse(Rule::a, "abcde" ).unwrap().tokens(); |
170 | let tokens_len = tokens.len(); |
171 | let _ = tokens.next().unwrap(); |
172 | assert_eq!(tokens.count() + 1, tokens_len); |
173 | } |
174 | } |
175 | |