1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | //! The core functionality of parsing grammar. |
11 | //! State of parser during the process of rules handling. |
12 | |
13 | use alloc::borrow::ToOwned; |
14 | use alloc::boxed::Box; |
15 | use alloc::collections::BTreeSet; |
16 | use alloc::rc::Rc; |
17 | use alloc::string::String; |
18 | use alloc::vec; |
19 | use alloc::vec::Vec; |
20 | use core::fmt::{Debug, Display, Formatter}; |
21 | use core::num::NonZeroUsize; |
22 | use core::ops::Range; |
23 | use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; |
24 | |
25 | use crate::error::{Error, ErrorVariant}; |
26 | use crate::iterators::pairs::new; |
27 | use crate::iterators::{pairs, QueueableToken}; |
28 | use crate::position::Position; |
29 | use crate::span::Span; |
30 | use crate::stack::Stack; |
31 | use crate::RuleType; |
32 | |
33 | /// The current lookahead status of a [`ParserState`]. |
34 | /// |
35 | /// [`ParserState`]: struct.ParserState.html |
36 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
37 | pub enum Lookahead { |
38 | /// The positive predicate, written as an ampersand &, |
39 | /// attempts to match its inner expression. |
40 | /// If the inner expression succeeds, parsing continues, |
41 | /// but at the same position as the predicate — |
42 | /// &foo ~ bar is thus a kind of "AND" statement: |
43 | /// "the input string must match foo AND bar". |
44 | /// If the inner expression fails, |
45 | /// the whole expression fails too. |
46 | Positive, |
47 | /// The negative predicate, written as an exclamation mark !, |
48 | /// attempts to match its inner expression. |
49 | /// If the inner expression fails, the predicate succeeds |
50 | /// and parsing continues at the same position as the predicate. |
51 | /// If the inner expression succeeds, the predicate fails — |
52 | /// !foo ~ bar is thus a kind of "NOT" statement: |
53 | /// "the input string must match bar but NOT foo". |
54 | Negative, |
55 | /// No lookahead (i.e. it will consume input). |
56 | None, |
57 | } |
58 | |
59 | /// The current atomicity of a [`ParserState`]. |
60 | /// |
61 | /// [`ParserState`]: struct.ParserState.html |
62 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
63 | pub enum Atomicity { |
64 | /// prevents implicit whitespace: inside an atomic rule, |
65 | /// the tilde ~ means "immediately followed by", |
66 | /// and repetition operators (asterisk * and plus sign +) |
67 | /// have no implicit separation. In addition, all other rules |
68 | /// called from an atomic rule are also treated as atomic. |
69 | /// (interior matching rules are silent) |
70 | Atomic, |
71 | /// The same as atomic, but inner tokens are produced as normal. |
72 | CompoundAtomic, |
73 | /// implicit whitespace is enabled |
74 | NonAtomic, |
75 | } |
76 | |
77 | /// Type alias to simplify specifying the return value of chained closures. |
78 | pub type ParseResult<S> = Result<S, S>; |
79 | |
80 | /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`. |
81 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
82 | pub enum MatchDir { |
83 | /// from the bottom to the top of the stack |
84 | BottomToTop, |
85 | /// from the top to the bottom of the stack |
86 | TopToBottom, |
87 | } |
88 | |
89 | static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0); |
90 | |
91 | /// Sets the maximum call limit for the parser state |
92 | /// to prevent stack overflows or excessive execution times |
93 | /// in some grammars. |
94 | /// If set, the calls are tracked as a running total |
95 | /// over all non-terminal rules that can nest closures |
96 | /// (which are passed to transform the parser state). |
97 | /// |
98 | /// # Arguments |
99 | /// |
100 | /// * `limit` - The maximum number of calls. If None, |
101 | /// the number of calls is unlimited. |
102 | pub fn set_call_limit(limit: Option<NonZeroUsize>) { |
103 | CALL_LIMIT.store(val:limit.map(|f| f.get()).unwrap_or(0), order:Ordering::Relaxed); |
104 | } |
105 | |
106 | static ERROR_DETAIL: AtomicBool = AtomicBool::new(false); |
107 | |
108 | /// Sets whether information for more error details |
109 | /// should be collected. This is useful for debugging |
110 | /// parser errors (as it leads to more comprehensive |
111 | /// error messages), but it has a higher performance cost. |
112 | /// (hence, it's off by default) |
113 | /// |
114 | /// # Arguments |
115 | /// |
116 | /// * `enabled` - Whether to enable the collection for |
117 | /// more error details. |
118 | pub fn set_error_detail(enabled: bool) { |
119 | ERROR_DETAIL.store(val:enabled, order:Ordering::Relaxed); |
120 | } |
121 | |
122 | #[derive (Debug)] |
123 | struct CallLimitTracker { |
124 | current_call_limit: Option<(usize, usize)>, |
125 | } |
126 | |
127 | impl Default for CallLimitTracker { |
128 | fn default() -> Self { |
129 | let limit: usize = CALL_LIMIT.load(order:Ordering::Relaxed); |
130 | let current_call_limit: Option<(usize, usize)> = if limit > 0 { Some((0, limit)) } else { None }; |
131 | Self { current_call_limit } |
132 | } |
133 | } |
134 | |
135 | impl CallLimitTracker { |
136 | fn limit_reached(&self) -> bool { |
137 | self.current_call_limit |
138 | .map_or(default:false, |(current: usize, limit: usize)| current >= limit) |
139 | } |
140 | |
141 | fn increment_depth(&mut self) { |
142 | if let Some((current: &mut usize, _)) = &mut self.current_call_limit { |
143 | *current += 1; |
144 | } |
145 | } |
146 | } |
147 | |
148 | /// Number of call stacks that may result from a sequence of rules parsing. |
149 | const CALL_STACK_INITIAL_CAPACITY: usize = 20; |
150 | /// Max (un)expected number of tokens that we may see on the parsing error position. |
151 | const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30; |
152 | /// Max rule children number for which we'll extend calls stacks. |
153 | /// |
154 | /// In case rule we're working with has too many children rules that failed in parsing, |
155 | /// we don't want to store long stacks for all of them. If rule has more than this number |
156 | /// of failed children, they all will be collapsed in a parent rule. |
157 | const CALL_STACK_CHILDREN_THRESHOLD: usize = 4; |
158 | |
159 | /// Structure tracking errored parsing call (associated with specific `ParserState` function). |
160 | #[derive (Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)] |
161 | pub enum ParseAttempt<R> { |
162 | /// Call of `rule` errored. |
163 | Rule(R), |
164 | /// Call of token element (e.g., `match_string` or `match_insensitive`) errored. |
165 | /// Works as indicator of that leaf node is not a rule. In order to get the token value we |
166 | /// can address `ParseAttempts` `(un)expected_tokens`. |
167 | Token, |
168 | } |
169 | |
170 | impl<R> ParseAttempt<R> { |
171 | pub fn get_rule(&self) -> Option<&R> { |
172 | match self { |
173 | ParseAttempt::Rule(r: &R) => Some(r), |
174 | ParseAttempt::Token => None, |
175 | } |
176 | } |
177 | } |
178 | |
179 | /// Rules call stack. |
180 | /// Contains sequence of rule calls that resulted in new parsing attempt. |
181 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
182 | pub struct RulesCallStack<R> { |
183 | /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule). |
184 | pub deepest: ParseAttempt<R>, |
185 | /// Most top rule covering `deepest`. |
186 | pub parent: Option<R>, |
187 | } |
188 | |
189 | impl<R> RulesCallStack<R> { |
190 | fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> { |
191 | RulesCallStack { |
192 | deepest, |
193 | parent: None, |
194 | } |
195 | } |
196 | } |
197 | |
198 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
199 | pub enum ParsingToken { |
200 | Sensitive { token: String }, |
201 | Insensitive { token: String }, |
202 | Range { start: char, end: char }, |
203 | BuiltInRule, |
204 | } |
205 | |
206 | impl Display for ParsingToken { |
207 | fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { |
208 | match self { |
209 | ParsingToken::Sensitive { token: &String } => write!(f, " {token}" ), |
210 | ParsingToken::Insensitive { token: &String } => write!(f, " {}" , token.to_uppercase()), |
211 | ParsingToken::Range { start: &char, end: &char } => write!(f, " {start}.. {end}" ), |
212 | ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE" ), |
213 | } |
214 | } |
215 | } |
216 | |
217 | /// Structure that tracks all the parsing attempts made on the max position. |
218 | /// We want to give an error hint about parsing rules that succeeded |
219 | /// at the farthest input position. |
220 | /// The intuition is such rules will be most likely the query user initially wanted to write. |
221 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
222 | pub struct ParseAttempts<R> { |
223 | /// Indicates whether the parsing attempts are tracked. |
224 | enabled: bool, |
225 | /// Vec of rule calls sequences awaiting tokens at the same `max_position`. |
226 | /// If there are several stacks in vec, it means all those rule stacks are "equal" |
227 | /// because their attempts occurred on the same position. |
228 | pub call_stacks: Vec<RulesCallStack<R>>, |
229 | /// Tokens that could be putted at `max_position` |
230 | /// in order to get a valid grammar query. |
231 | expected_tokens: Vec<ParsingToken>, |
232 | /// Tokens that we've prohibited to be putted at `max_position` |
233 | /// in order to get a valid grammar query. |
234 | unexpected_tokens: Vec<ParsingToken>, |
235 | /// Max position at which we were expecting to see one of `expected_tokens`. |
236 | pub max_position: usize, |
237 | } |
238 | |
239 | impl<R: RuleType> ParseAttempts<R> { |
240 | /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens` |
241 | /// initialized with capacity. |
242 | pub fn new() -> Self { |
243 | Self { |
244 | call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY), |
245 | expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), |
246 | unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), |
247 | max_position: 0, |
248 | enabled: ERROR_DETAIL.load(Ordering::Relaxed), |
249 | } |
250 | } |
251 | |
252 | /// Get number of currently present call stacks. |
253 | fn call_stacks_number(&self) -> usize { |
254 | self.call_stacks.len() |
255 | } |
256 | |
257 | pub fn expected_tokens(&self) -> Vec<ParsingToken> { |
258 | self.expected_tokens |
259 | .iter() |
260 | .cloned() |
261 | .collect::<BTreeSet<_>>() |
262 | .into_iter() |
263 | .collect() |
264 | } |
265 | |
266 | pub fn unexpected_tokens(&self) -> Vec<ParsingToken> { |
267 | self.unexpected_tokens |
268 | .iter() |
269 | .cloned() |
270 | .collect::<BTreeSet<_>>() |
271 | .into_iter() |
272 | .collect() |
273 | } |
274 | |
275 | /// Retrieve call stacks. |
276 | pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> { |
277 | self.call_stacks |
278 | .iter() |
279 | .cloned() |
280 | .collect::<BTreeSet<_>>() |
281 | .into_iter() |
282 | .collect() |
283 | } |
284 | |
285 | /// In case we've tried to parse a rule, which start position is bigger than previous |
286 | /// `max_position` it means that we've advanced in our parsing and found better candidate. |
287 | /// |
288 | /// `start_index` is: |
289 | /// * Number of call stacks present in state at the moment current `rule` was called. The idea |
290 | /// is that we'd like to update only those stacks that originated from the current `rule` and |
291 | /// not from those that were called previously. |
292 | /// * 0 in case we've successfully parsed some token since the moment `rule` was called. |
293 | fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) { |
294 | let mut non_token_call_stacks = Vec::new(); |
295 | let mut token_call_stack_met = false; |
296 | for call_stack in self.call_stacks.iter().skip(start_index) { |
297 | if matches!(call_stack.deepest, ParseAttempt::Token) { |
298 | token_call_stack_met = true; |
299 | } else { |
300 | non_token_call_stacks.push(call_stack.clone()) |
301 | } |
302 | } |
303 | if token_call_stack_met && non_token_call_stacks.is_empty() { |
304 | // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone |
305 | // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a |
306 | // rule) as soon as it doesn't give us any useful additional info. |
307 | non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token)); |
308 | } |
309 | self.call_stacks |
310 | .splice(start_index.., non_token_call_stacks); |
311 | |
312 | let children_number_over_threshold = |
313 | self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD; |
314 | if children_number_over_threshold { |
315 | self.call_stacks.truncate(start_index); |
316 | self.call_stacks |
317 | .push(RulesCallStack::new(ParseAttempt::Rule(rule))); |
318 | } else { |
319 | for call_stack in self.call_stacks.iter_mut().skip(start_index) { |
320 | if matches!(call_stack.deepest, ParseAttempt::Token) { |
321 | call_stack.deepest = ParseAttempt::Rule(rule); |
322 | } else { |
323 | call_stack.parent = Some(rule); |
324 | } |
325 | } |
326 | } |
327 | } |
328 | |
329 | /// If `expected` flag is set to false, it means we've successfully parsed token being in the |
330 | /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise, |
331 | /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`. |
332 | /// |
333 | /// In case `position` is: |
334 | /// * Equal to `max_position`, add `token` to `target_vec`, |
335 | /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`. |
336 | #[allow (clippy::comparison_chain)] |
337 | fn try_add_new_token( |
338 | &mut self, |
339 | token: ParsingToken, |
340 | start_position: usize, |
341 | position: usize, |
342 | negative_lookahead: bool, |
343 | ) { |
344 | let target_vec_push_token = |attempts: &mut ParseAttempts<R>| { |
345 | let target_vec = if negative_lookahead { |
346 | &mut attempts.unexpected_tokens |
347 | } else { |
348 | &mut attempts.expected_tokens |
349 | }; |
350 | target_vec.push(token); |
351 | }; |
352 | |
353 | if position > self.max_position { |
354 | if negative_lookahead && start_position > self.max_position { |
355 | // We encountered a sequence under negative lookahead. |
356 | // We would like to track only first failed token in this sequence (which |
357 | // `start_position` should be equal to `self.max_position`). |
358 | return; |
359 | } |
360 | target_vec_push_token(self); |
361 | |
362 | if negative_lookahead { |
363 | // In case of successful parsing of token under negative lookahead the only |
364 | // thing we'd like to do is to track the token in the `unexpected_tokens`. |
365 | return; |
366 | } |
367 | self.max_position = position; |
368 | self.expected_tokens.clear(); |
369 | self.unexpected_tokens.clear(); |
370 | self.call_stacks.clear(); |
371 | self.call_stacks |
372 | .push(RulesCallStack::new(ParseAttempt::Token)); |
373 | } else if position == self.max_position { |
374 | target_vec_push_token(self); |
375 | self.call_stacks |
376 | .push(RulesCallStack::new(ParseAttempt::Token)); |
377 | } |
378 | } |
379 | |
380 | /// Reset state in case we've successfully parsed some token in |
381 | /// `match_string` or `match_insensetive`. |
382 | fn nullify_expected_tokens(&mut self, new_max_position: usize) { |
383 | self.call_stacks.clear(); |
384 | self.expected_tokens.clear(); |
385 | self.unexpected_tokens.clear(); |
386 | self.max_position = new_max_position; |
387 | } |
388 | } |
389 | |
390 | impl<R: RuleType> Default for ParseAttempts<R> { |
391 | fn default() -> Self { |
392 | Self::new() |
393 | } |
394 | } |
395 | |
396 | /// The complete state of a [`Parser`]. |
397 | /// |
398 | /// [`Parser`]: trait.Parser.html |
399 | #[derive (Debug)] |
400 | pub struct ParserState<'i, R: RuleType> { |
401 | /// Current position from which we try to apply some parser function. |
402 | /// Initially is 0. |
403 | /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive` |
404 | /// and switched our `position` from 0 to the length of "create". |
405 | /// |
406 | /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`. |
407 | position: Position<'i>, |
408 | /// Queue representing rules partially (`QueueableToken::Start`) and |
409 | /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state |
410 | /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to |
411 | /// `End` state. |
412 | queue: Vec<QueueableToken<'i, R>>, |
413 | /// Status set in case specific lookahead logic is used in grammar. |
414 | /// See `Lookahead` for more information. |
415 | lookahead: Lookahead, |
416 | /// Rules that we HAVE expected, tried to parse, but failed. |
417 | pos_attempts: Vec<R>, |
418 | /// Rules that we have NOT expected, tried to parse, but failed. |
419 | neg_attempts: Vec<R>, |
420 | /// Max position in the query from which we've tried to parse some rule but failed. |
421 | attempt_pos: usize, |
422 | /// Current atomicity status. For more information see `Atomicity`. |
423 | atomicity: Atomicity, |
424 | /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP |
425 | /// invocations). |
426 | stack: Stack<Span<'i>>, |
427 | /// Used for setting max parser calls limit. |
428 | call_tracker: CallLimitTracker, |
429 | /// Together with tracking of `pos_attempts` and `attempt_pos` |
430 | /// as a pair of (list of rules that we've tried to parse but failed, max parsed position) |
431 | /// we track those rules (which we've tried to parse at the same max pos) at this helper struct. |
432 | /// |
433 | /// Note, that we may try to parse several rules on different positions. We want to track only |
434 | /// those rules, which attempt position is bigger, because we consider that it's nearer to the |
435 | /// query that user really wanted to pass. |
436 | /// |
437 | /// E.g. we have a query `create user "Bobby"` and two root rules: |
438 | /// * CreateUser = { "create" ~ "user" ~ Name } |
439 | /// * CreateTable = { "create" ~ "table" ~ Name } |
440 | /// * Name = { SOME_DEFINITION } |
441 | /// |
442 | /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd |
443 | /// successfully parse "create" + "user" (and not "table"). |
444 | parse_attempts: ParseAttempts<R>, |
445 | } |
446 | |
447 | /// Creates a `ParserState` from a `&str`, supplying it to a closure `f`. |
448 | /// |
449 | /// # Examples |
450 | /// |
451 | /// ``` |
452 | /// # use pest; |
453 | /// let input = "" ; |
454 | /// pest::state::<(), _>(input, |s| Ok(s)).unwrap(); |
455 | /// ``` |
456 | #[allow (clippy::perf)] |
457 | pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>> |
458 | where |
459 | F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>, |
460 | { |
461 | let state = ParserState::new(input); |
462 | |
463 | match f(state) { |
464 | Ok(state) => { |
465 | let len = state.queue.len(); |
466 | Ok(new(Rc::new(state.queue), input, None, 0, len)) |
467 | } |
468 | Err(mut state) => { |
469 | let variant = if state.reached_call_limit() { |
470 | ErrorVariant::CustomError { |
471 | message: "call limit reached" .to_owned(), |
472 | } |
473 | } else { |
474 | state.pos_attempts.sort(); |
475 | state.pos_attempts.dedup(); |
476 | state.neg_attempts.sort(); |
477 | state.neg_attempts.dedup(); |
478 | ErrorVariant::ParsingError { |
479 | positives: state.pos_attempts.clone(), |
480 | negatives: state.neg_attempts.clone(), |
481 | } |
482 | }; |
483 | |
484 | if state.parse_attempts.enabled { |
485 | Err(Error::new_from_pos_with_parsing_attempts( |
486 | variant, |
487 | Position::new_internal(input, state.attempt_pos), |
488 | state.parse_attempts.clone(), |
489 | )) |
490 | } else { |
491 | Err(Error::new_from_pos( |
492 | variant, |
493 | Position::new_internal(input, state.attempt_pos), |
494 | )) |
495 | } |
496 | } |
497 | } |
498 | } |
499 | |
500 | impl<'i, R: RuleType> ParserState<'i, R> { |
501 | /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box` |
502 | /// will be passed from closure to closure based on the needs of the specified `Parser`. |
503 | /// |
504 | /// # Examples |
505 | /// |
506 | /// ``` |
507 | /// # use pest; |
508 | /// let input = "" ; |
509 | /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input); |
510 | /// ``` |
511 | pub fn new(input: &'i str) -> Box<Self> { |
512 | Box::new(ParserState { |
513 | position: Position::from_start(input), |
514 | queue: vec![], |
515 | lookahead: Lookahead::None, |
516 | pos_attempts: vec![], |
517 | neg_attempts: vec![], |
518 | attempt_pos: 0, |
519 | atomicity: Atomicity::NonAtomic, |
520 | stack: Stack::new(), |
521 | call_tracker: Default::default(), |
522 | parse_attempts: ParseAttempts::new(), |
523 | }) |
524 | } |
525 | |
526 | /// Get all parse attempts after process of parsing is finished. |
527 | pub fn get_parse_attempts(&self) -> &ParseAttempts<R> { |
528 | &self.parse_attempts |
529 | } |
530 | |
531 | /// Returns a reference to the current `Position` of the `ParserState`. |
532 | /// |
533 | /// # Examples |
534 | /// |
535 | /// ``` |
536 | /// # use pest; |
537 | /// # #[allow (non_camel_case_types)] |
538 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
539 | /// enum Rule { |
540 | /// ab |
541 | /// } |
542 | /// |
543 | /// let input = "ab" ; |
544 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
545 | /// let position = state.position(); |
546 | /// assert_eq!(position.pos(), 0); |
547 | /// ``` |
548 | pub fn position(&self) -> &Position<'i> { |
549 | &self.position |
550 | } |
551 | |
552 | /// Returns the current atomicity of the `ParserState`. |
553 | /// |
554 | /// # Examples |
555 | /// |
556 | /// ``` |
557 | /// # use pest; |
558 | /// # use pest::Atomicity; |
559 | /// # #[allow (non_camel_case_types)] |
560 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
561 | /// enum Rule { |
562 | /// ab |
563 | /// } |
564 | /// |
565 | /// let input = "ab" ; |
566 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
567 | /// let atomicity = state.atomicity(); |
568 | /// assert_eq!(atomicity, Atomicity::NonAtomic); |
569 | /// ``` |
570 | pub fn atomicity(&self) -> Atomicity { |
571 | self.atomicity |
572 | } |
573 | |
574 | #[inline ] |
575 | fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
576 | if self.call_tracker.limit_reached() { |
577 | return Err(self); |
578 | } |
579 | self.call_tracker.increment_depth(); |
580 | Ok(self) |
581 | } |
582 | |
583 | #[inline ] |
584 | fn reached_call_limit(&self) -> bool { |
585 | self.call_tracker.limit_reached() |
586 | } |
587 | |
588 | /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure |
589 | /// meant to match the rule. |
590 | /// |
591 | /// # Examples |
592 | /// |
593 | /// ``` |
594 | /// # use pest; |
595 | /// # #[allow (non_camel_case_types)] |
596 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
597 | /// enum Rule { |
598 | /// a |
599 | /// } |
600 | /// |
601 | /// let input = "a" ; |
602 | /// let pairs: Vec<_> = pest::state(input, |state| { |
603 | /// state.rule(Rule::a, |s| Ok(s)) |
604 | /// }).unwrap().collect(); |
605 | /// |
606 | /// assert_eq!(pairs.len(), 1); |
607 | /// ``` |
608 | #[inline ] |
609 | pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>> |
610 | where |
611 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
612 | { |
613 | self = self.inc_call_check_limit()?; |
614 | // Position from which this `rule` starts parsing. |
615 | let actual_pos = self.position.pos(); |
616 | // Remember index of the `self.queue` element that will be associated with this `rule`. |
617 | let index = self.queue.len(); |
618 | |
619 | let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos { |
620 | (self.pos_attempts.len(), self.neg_attempts.len()) |
621 | } else { |
622 | // Attempts have not been cleared yet since the attempt_pos is older. |
623 | (0, 0) |
624 | }; |
625 | |
626 | if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic { |
627 | // Pair's position will only be known after running the closure. |
628 | self.queue.push(QueueableToken::Start { |
629 | end_token_index: 0, |
630 | input_pos: actual_pos, |
631 | }); |
632 | } |
633 | |
634 | // Remember attempts number before `f` call. |
635 | // In `track` using this variable we can say, how many attempts were added |
636 | // during children rules traversal. |
637 | let attempts = self.attempts_at(actual_pos); |
638 | // Number of call stacks present in `self.parse_attempts` before `f` call. |
639 | // We need to remember this number only in case there wasn't found any farther attempt. |
640 | // E.g. we are handling rule, on start position of which may be tested two |
641 | // children rules. At the moment we'll return from `f` call below, |
642 | // there will be two more children rules in `self.parse_attempts` that we'll |
643 | // consider to be the children of current `rule`. |
644 | let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number(); |
645 | // Max parsing attempt position at the moment of `rule` handling. |
646 | // It case it's raised during children rules handling, it means |
647 | // we've made a parsing progress. |
648 | let remember_max_position = self.parse_attempts.max_position; |
649 | |
650 | let result = f(self); |
651 | |
652 | let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| { |
653 | if new_state.parse_attempts.max_position > remember_max_position { |
654 | // It means that one of `match_string` or e.g. `match_insensetive` function calls |
655 | // have already erased `self.parse_attempts.call_stacks` and that previously |
656 | // remembered values are not valid anymore. |
657 | remember_call_stacks_number = 0; |
658 | } |
659 | if !matches!(new_state.atomicity, Atomicity::Atomic) { |
660 | new_state |
661 | .parse_attempts |
662 | .try_add_new_stack_rule(rule, remember_call_stacks_number); |
663 | } |
664 | }; |
665 | |
666 | match result { |
667 | Ok(mut new_state) => { |
668 | if new_state.lookahead == Lookahead::Negative { |
669 | new_state.track( |
670 | rule, |
671 | actual_pos, |
672 | pos_attempts_index, |
673 | neg_attempts_index, |
674 | attempts, |
675 | ); |
676 | } |
677 | |
678 | if new_state.lookahead == Lookahead::None |
679 | && new_state.atomicity != Atomicity::Atomic |
680 | { |
681 | // Index of `QueueableToken::End` token added below |
682 | // that corresponds to previously added `QueueableToken::Start` token. |
683 | let new_index = new_state.queue.len(); |
684 | match new_state.queue[index] { |
685 | QueueableToken::Start { |
686 | ref mut end_token_index, |
687 | .. |
688 | } => *end_token_index = new_index, |
689 | _ => unreachable!(), |
690 | }; |
691 | |
692 | let new_pos = new_state.position.pos(); |
693 | |
694 | new_state.queue.push(QueueableToken::End { |
695 | start_token_index: index, |
696 | rule, |
697 | tag: None, |
698 | input_pos: new_pos, |
699 | }); |
700 | } |
701 | |
702 | // Note, that we need to count positive parsing results too, because we can fail in |
703 | // optional rule call inside which may lie the farthest |
704 | // parsed token. |
705 | if new_state.parse_attempts.enabled { |
706 | try_add_rule_to_stack(&mut new_state); |
707 | } |
708 | Ok(new_state) |
709 | } |
710 | Err(mut new_state) => { |
711 | if new_state.lookahead != Lookahead::Negative { |
712 | new_state.track( |
713 | rule, |
714 | actual_pos, |
715 | pos_attempts_index, |
716 | neg_attempts_index, |
717 | attempts, |
718 | ); |
719 | if new_state.parse_attempts.enabled { |
720 | try_add_rule_to_stack(&mut new_state); |
721 | } |
722 | } |
723 | |
724 | if new_state.lookahead == Lookahead::None |
725 | && new_state.atomicity != Atomicity::Atomic |
726 | { |
727 | new_state.queue.truncate(index); |
728 | } |
729 | |
730 | Err(new_state) |
731 | } |
732 | } |
733 | } |
734 | |
735 | /// Tag current node |
736 | /// |
737 | /// # Examples |
738 | /// |
739 | /// Try to recognize the one specified in a set of characters |
740 | /// |
741 | /// ``` |
742 | /// use pest::{state, ParseResult, ParserState, iterators::Pair}; |
743 | /// #[allow(non_camel_case_types)] |
744 | /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
745 | /// enum Rule { |
746 | /// character, |
747 | /// } |
748 | /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { |
749 | /// state.sequence(|state| { |
750 | /// character(state) |
751 | /// .and_then(|state| character(state)) |
752 | /// .and_then(|state| character(state)) |
753 | /// .and_then(|state| state.tag_node("c" )) |
754 | /// .and_then(|state| character(state)) |
755 | /// }) |
756 | /// } |
757 | /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { |
758 | /// state.rule(Rule::character, |state| state.match_range('a' ..'z' )) |
759 | /// } |
760 | /// |
761 | /// let input = "abcd" ; |
762 | /// let pairs = state(input, mark_c).unwrap(); |
763 | /// // find all node tag as `c` |
764 | /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c" )).collect(); |
765 | /// assert_eq!(find[0].as_str(), "c" ) |
766 | /// ``` |
767 | #[inline ] |
768 | pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> { |
769 | if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() { |
770 | *old = Some(tag) |
771 | } |
772 | Ok(self) |
773 | } |
774 | |
775 | /// Get number of allowed rules attempts + prohibited rules attempts. |
776 | fn attempts_at(&self, pos: usize) -> usize { |
777 | if self.attempt_pos == pos { |
778 | self.pos_attempts.len() + self.neg_attempts.len() |
779 | } else { |
780 | 0 |
781 | } |
782 | } |
783 | |
784 | fn track( |
785 | &mut self, |
786 | rule: R, |
787 | pos: usize, |
788 | pos_attempts_index: usize, |
789 | neg_attempts_index: usize, |
790 | prev_attempts: usize, |
791 | ) { |
792 | if self.atomicity == Atomicity::Atomic { |
793 | return; |
794 | } |
795 | |
796 | // If nested rules made no progress, there is no use to report them; it's only useful to |
797 | // track the current rule, the exception being when only one attempt has been made during |
798 | // the children rules. |
799 | let curr_attempts = self.attempts_at(pos); |
800 | if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 { |
801 | return; |
802 | } |
803 | |
804 | if pos == self.attempt_pos { |
805 | self.pos_attempts.truncate(pos_attempts_index); |
806 | self.neg_attempts.truncate(neg_attempts_index); |
807 | } |
808 | |
809 | if pos > self.attempt_pos { |
810 | self.pos_attempts.clear(); |
811 | self.neg_attempts.clear(); |
812 | self.attempt_pos = pos; |
813 | } |
814 | |
815 | let attempts = if self.lookahead != Lookahead::Negative { |
816 | &mut self.pos_attempts |
817 | } else { |
818 | &mut self.neg_attempts |
819 | }; |
820 | |
821 | if pos == self.attempt_pos { |
822 | attempts.push(rule); |
823 | } |
824 | } |
825 | |
826 | /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns |
827 | /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current |
828 | /// `Box<ParserState>` otherwise. |
829 | /// |
830 | /// This method is useful to parse sequences that only match together which usually come in the |
831 | /// form of chained `Result`s with |
832 | /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then). |
833 | /// |
834 | /// |
835 | /// # Examples |
836 | /// |
837 | /// ``` |
838 | /// # use pest; |
839 | /// # #[allow (non_camel_case_types)] |
840 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
841 | /// enum Rule { |
842 | /// a |
843 | /// } |
844 | /// |
845 | /// let input = "a" ; |
846 | /// let pairs: Vec<_> = pest::state(input, |state| { |
847 | /// state.sequence(|s| { |
848 | /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| { |
849 | /// s.match_string("b" ) |
850 | /// }) |
851 | /// }).or_else(|s| { |
852 | /// Ok(s) |
853 | /// }) |
854 | /// }).unwrap().collect(); |
855 | /// |
856 | /// assert_eq!(pairs.len(), 0); |
857 | /// ``` |
858 | #[inline ] |
859 | pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
860 | where |
861 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
862 | { |
863 | self = self.inc_call_check_limit()?; |
864 | let token_index = self.queue.len(); |
865 | let initial_pos = self.position; |
866 | |
867 | let result = f(self); |
868 | |
869 | match result { |
870 | Ok(new_state) => Ok(new_state), |
871 | Err(mut new_state) => { |
872 | // Restore the initial position and truncate the token queue. |
873 | new_state.position = initial_pos; |
874 | new_state.queue.truncate(token_index); |
875 | Err(new_state) |
876 | } |
877 | } |
878 | } |
879 | |
880 | /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns |
881 | /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`. |
882 | /// |
883 | /// # Examples |
884 | /// |
885 | /// ``` |
886 | /// # use pest; |
887 | /// # #[allow (non_camel_case_types)] |
888 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
889 | /// enum Rule { |
890 | /// ab |
891 | /// } |
892 | /// |
893 | /// let input = "aab" ; |
894 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
895 | /// let mut result = state.repeat(|s| { |
896 | /// s.match_string("a" ) |
897 | /// }); |
898 | /// assert!(result.is_ok()); |
899 | /// assert_eq!(result.unwrap().position().pos(), 2); |
900 | /// |
901 | /// state = pest::ParserState::new(input); |
902 | /// result = state.repeat(|s| { |
903 | /// s.match_string("b" ) |
904 | /// }); |
905 | /// assert!(result.is_ok()); |
906 | /// assert_eq!(result.unwrap().position().pos(), 0); |
907 | /// ``` |
908 | #[inline ] |
909 | pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>> |
910 | where |
911 | F: FnMut(Box<Self>) -> ParseResult<Box<Self>>, |
912 | { |
913 | self = self.inc_call_check_limit()?; |
914 | let mut result = f(self); |
915 | |
916 | loop { |
917 | match result { |
918 | Ok(state) => result = f(state), |
919 | Err(state) => return Ok(state), |
920 | }; |
921 | } |
922 | } |
923 | |
924 | /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns |
925 | /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`. |
926 | /// |
927 | /// # Examples |
928 | /// |
929 | /// ``` |
930 | /// # use pest; |
931 | /// # #[allow (non_camel_case_types)] |
932 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
933 | /// enum Rule { |
934 | /// ab |
935 | /// } |
936 | /// |
937 | /// let input = "ab" ; |
938 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
939 | /// let result = state.optional(|s| { |
940 | /// s.match_string("ab" ) |
941 | /// }); |
942 | /// assert!(result.is_ok()); |
943 | /// |
944 | /// state = pest::ParserState::new(input); |
945 | /// let result = state.optional(|s| { |
946 | /// s.match_string("ac" ) |
947 | /// }); |
948 | /// assert!(result.is_ok()); |
949 | /// ``` |
950 | #[inline ] |
951 | pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
952 | where |
953 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
954 | { |
955 | self = self.inc_call_check_limit()?; |
956 | match f(self) { |
957 | Ok(state) | Err(state) => Ok(state), |
958 | } |
959 | } |
960 | |
961 | /// Generic function to handle result of char/string/range parsing |
962 | /// in order to track (un)expected tokens. |
963 | fn handle_token_parse_result( |
964 | &mut self, |
965 | start_position: usize, |
966 | token: ParsingToken, |
967 | parse_succeeded: bool, |
968 | ) { |
969 | // New position after tracked parsed element for case of `parse_succeded` is true. |
970 | // Position of parsing failure otherwise. |
971 | let current_pos = self.position.pos(); |
972 | |
973 | if parse_succeeded { |
974 | if self.lookahead == Lookahead::Negative { |
975 | self.parse_attempts |
976 | .try_add_new_token(token, start_position, current_pos, true); |
977 | } else if current_pos > self.parse_attempts.max_position { |
978 | self.parse_attempts.nullify_expected_tokens(current_pos); |
979 | } |
980 | } else if self.lookahead != Lookahead::Negative { |
981 | self.parse_attempts |
982 | .try_add_new_token(token, start_position, current_pos, false); |
983 | } |
984 | } |
985 | |
986 | /// Attempts to match a single character based on a filter function. Returns `Ok` with the |
987 | /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` |
988 | /// otherwise. |
989 | /// |
990 | /// # Examples |
991 | /// |
992 | /// ``` |
993 | /// # use pest; |
994 | /// # #[allow (non_camel_case_types)] |
995 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
996 | /// enum Rule {} |
997 | /// |
998 | /// let input = "ab" ; |
999 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1000 | /// let result = state.match_char_by(|c| c.is_ascii()); |
1001 | /// assert!(result.is_ok()); |
1002 | /// assert_eq!(result.unwrap().position().pos(), 1); |
1003 | /// |
1004 | /// let input = "❤" ; |
1005 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1006 | /// let result = state.match_char_by(|c| c.is_ascii()); |
1007 | /// assert!(result.is_err()); |
1008 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
1009 | /// ``` |
1010 | #[inline ] |
1011 | pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
1012 | where |
1013 | F: FnOnce(char) -> bool, |
1014 | { |
1015 | let start_position = self.position.pos(); |
1016 | let succeeded = self.position.match_char_by(f); |
1017 | if self.parse_attempts.enabled { |
1018 | let token = ParsingToken::BuiltInRule; |
1019 | self.handle_token_parse_result(start_position, token, succeeded); |
1020 | } |
1021 | if succeeded { |
1022 | Ok(self) |
1023 | } else { |
1024 | Err(self) |
1025 | } |
1026 | } |
1027 | |
1028 | /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if |
1029 | /// successful, or `Err` with the updated `Box<ParserState>` otherwise. |
1030 | /// |
1031 | /// # Examples |
1032 | /// |
1033 | /// ``` |
1034 | /// # use pest; |
1035 | /// # #[allow (non_camel_case_types)] |
1036 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1037 | /// enum Rule {} |
1038 | /// |
1039 | /// let input = "ab" ; |
1040 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1041 | /// let mut result = state.match_string("ab" ); |
1042 | /// assert!(result.is_ok()); |
1043 | /// assert_eq!(result.unwrap().position().pos(), 2); |
1044 | /// |
1045 | /// state = pest::ParserState::new(input); |
1046 | /// result = state.match_string("ac" ); |
1047 | /// assert!(result.is_err()); |
1048 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
1049 | /// ``` |
1050 | #[inline ] |
1051 | pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> { |
1052 | let start_position = self.position.pos(); |
1053 | let succeeded = self.position.match_string(string); |
1054 | if self.parse_attempts.enabled { |
1055 | let token = ParsingToken::Sensitive { |
1056 | token: String::from(string), |
1057 | }; |
1058 | self.handle_token_parse_result(start_position, token, succeeded); |
1059 | } |
1060 | if succeeded { |
1061 | Ok(self) |
1062 | } else { |
1063 | Err(self) |
1064 | } |
1065 | } |
1066 | |
1067 | /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated |
1068 | /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
1069 | /// |
1070 | /// # Examples |
1071 | /// |
1072 | /// ``` |
1073 | /// # use pest; |
1074 | /// # #[allow (non_camel_case_types)] |
1075 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1076 | /// enum Rule {} |
1077 | /// |
1078 | /// let input = "ab" ; |
1079 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1080 | /// let mut result = state.match_insensitive("AB" ); |
1081 | /// assert!(result.is_ok()); |
1082 | /// assert_eq!(result.unwrap().position().pos(), 2); |
1083 | /// |
1084 | /// state = pest::ParserState::new(input); |
1085 | /// result = state.match_insensitive("AC" ); |
1086 | /// assert!(result.is_err()); |
1087 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
1088 | /// ``` |
1089 | #[inline ] |
1090 | pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> { |
1091 | let start_position: usize = self.position().pos(); |
1092 | let succeeded = self.position.match_insensitive(string); |
1093 | if self.parse_attempts.enabled { |
1094 | let token = ParsingToken::Insensitive { |
1095 | token: String::from(string), |
1096 | }; |
1097 | self.handle_token_parse_result(start_position, token, succeeded); |
1098 | } |
1099 | if succeeded { |
1100 | Ok(self) |
1101 | } else { |
1102 | Err(self) |
1103 | } |
1104 | } |
1105 | |
1106 | /// Attempts to match a single character from the given range. Returns `Ok` with the updated |
1107 | /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
1108 | /// |
1109 | /// # Caution |
1110 | /// The provided `range` is interpreted as inclusive. |
1111 | /// |
1112 | /// # Examples |
1113 | /// |
1114 | /// ``` |
1115 | /// # use pest; |
1116 | /// # #[allow (non_camel_case_types)] |
1117 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1118 | /// enum Rule {} |
1119 | /// |
1120 | /// let input = "ab" ; |
1121 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1122 | /// let mut result = state.match_range('a' ..'z' ); |
1123 | /// assert!(result.is_ok()); |
1124 | /// assert_eq!(result.unwrap().position().pos(), 1); |
1125 | /// |
1126 | /// state = pest::ParserState::new(input); |
1127 | /// result = state.match_range('A' ..'Z' ); |
1128 | /// assert!(result.is_err()); |
1129 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
1130 | /// ``` |
1131 | #[inline ] |
1132 | pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> { |
1133 | let start_position = self.position().pos(); |
1134 | let token = ParsingToken::Range { |
1135 | start: range.start, |
1136 | end: range.end, |
1137 | }; |
1138 | let succeeded = self.position.match_range(range); |
1139 | if self.parse_attempts.enabled { |
1140 | self.handle_token_parse_result(start_position, token, succeeded); |
1141 | } |
1142 | if succeeded { |
1143 | Ok(self) |
1144 | } else { |
1145 | Err(self) |
1146 | } |
1147 | } |
1148 | |
1149 | /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>` |
1150 | /// if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
1151 | /// |
1152 | /// # Examples |
1153 | /// |
1154 | /// ``` |
1155 | /// # use pest; |
1156 | /// # #[allow (non_camel_case_types)] |
1157 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1158 | /// enum Rule {} |
1159 | /// |
1160 | /// let input = "ab" ; |
1161 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1162 | /// let mut result = state.skip(1); |
1163 | /// assert!(result.is_ok()); |
1164 | /// assert_eq!(result.unwrap().position().pos(), 1); |
1165 | /// |
1166 | /// state = pest::ParserState::new(input); |
1167 | /// result = state.skip(3); |
1168 | /// assert!(result.is_err()); |
1169 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
1170 | /// ``` |
1171 | #[inline ] |
1172 | pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> { |
1173 | if self.position.skip(n) { |
1174 | Ok(self) |
1175 | } else { |
1176 | Err(self) |
1177 | } |
1178 | } |
1179 | |
1180 | /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the |
1181 | /// updated `Box<ParserState>` whether or not one of the strings is found. |
1182 | /// |
1183 | /// # Examples |
1184 | /// |
1185 | /// ``` |
1186 | /// # use pest; |
1187 | /// # #[allow (non_camel_case_types)] |
1188 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1189 | /// enum Rule {} |
1190 | /// |
1191 | /// let input = "abcd" ; |
1192 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1193 | /// let mut result = state.skip_until(&["c" , "d" ]); |
1194 | /// assert!(result.is_ok()); |
1195 | /// assert_eq!(result.unwrap().position().pos(), 2); |
1196 | /// ``` |
1197 | #[inline ] |
1198 | pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> { |
1199 | self.position.skip_until(strings); |
1200 | Ok(self) |
1201 | } |
1202 | |
1203 | /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>` |
1204 | /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise. |
1205 | /// |
1206 | /// # Examples |
1207 | /// |
1208 | /// ``` |
1209 | /// # use pest; |
1210 | /// # #[allow (non_camel_case_types)] |
1211 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1212 | /// enum Rule {} |
1213 | /// |
1214 | /// let input = "ab" ; |
1215 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1216 | /// let mut result = state.start_of_input(); |
1217 | /// assert!(result.is_ok()); |
1218 | /// |
1219 | /// state = pest::ParserState::new(input); |
1220 | /// state = state.match_string("ab" ).unwrap(); |
1221 | /// result = state.start_of_input(); |
1222 | /// assert!(result.is_err()); |
1223 | /// ``` |
1224 | #[inline ] |
1225 | pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> { |
1226 | if self.position.at_start() { |
1227 | Ok(self) |
1228 | } else { |
1229 | Err(self) |
1230 | } |
1231 | } |
1232 | |
1233 | /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if |
1234 | /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise. |
1235 | /// |
1236 | /// # Examples |
1237 | /// |
1238 | /// ``` |
1239 | /// # use pest; |
1240 | /// # #[allow (non_camel_case_types)] |
1241 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1242 | /// enum Rule {} |
1243 | /// |
1244 | /// let input = "ab" ; |
1245 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1246 | /// let mut result = state.end_of_input(); |
1247 | /// assert!(result.is_err()); |
1248 | /// |
1249 | /// state = pest::ParserState::new(input); |
1250 | /// state = state.match_string("ab" ).unwrap(); |
1251 | /// result = state.end_of_input(); |
1252 | /// assert!(result.is_ok()); |
1253 | /// ``` |
1254 | #[inline ] |
1255 | pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> { |
1256 | if self.position.at_end() { |
1257 | Ok(self) |
1258 | } else { |
1259 | Err(self) |
1260 | } |
1261 | } |
1262 | |
1263 | /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns |
1264 | /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current |
1265 | /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err` |
1266 | /// together, negating the `Result`. |
1267 | /// |
1268 | /// # Examples |
1269 | /// |
1270 | /// ``` |
1271 | /// # use pest; |
1272 | /// # #[allow (non_camel_case_types)] |
1273 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1274 | /// enum Rule { |
1275 | /// a |
1276 | /// } |
1277 | /// |
1278 | /// let input = "a" ; |
1279 | /// let pairs: Vec<_> = pest::state(input, |state| { |
1280 | /// state.lookahead(true, |state| { |
1281 | /// state.rule(Rule::a, |s| Ok(s)) |
1282 | /// }) |
1283 | /// }).unwrap().collect(); |
1284 | /// |
1285 | /// assert_eq!(pairs.len(), 0); |
1286 | /// ``` |
1287 | #[inline ] |
1288 | pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>> |
1289 | where |
1290 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
1291 | { |
1292 | self = self.inc_call_check_limit()?; |
1293 | let initial_lookahead = self.lookahead; |
1294 | |
1295 | self.lookahead = if is_positive { |
1296 | match initial_lookahead { |
1297 | Lookahead::None | Lookahead::Positive => Lookahead::Positive, |
1298 | Lookahead::Negative => Lookahead::Negative, |
1299 | } |
1300 | } else { |
1301 | match initial_lookahead { |
1302 | Lookahead::None | Lookahead::Positive => Lookahead::Negative, |
1303 | Lookahead::Negative => Lookahead::Positive, |
1304 | } |
1305 | }; |
1306 | |
1307 | let initial_pos = self.position; |
1308 | |
1309 | let result = f(self.checkpoint()); |
1310 | |
1311 | let result_state = match result { |
1312 | Ok(mut new_state) => { |
1313 | new_state.position = initial_pos; |
1314 | new_state.lookahead = initial_lookahead; |
1315 | Ok(new_state.restore()) |
1316 | } |
1317 | Err(mut new_state) => { |
1318 | new_state.position = initial_pos; |
1319 | new_state.lookahead = initial_lookahead; |
1320 | Err(new_state.restore()) |
1321 | } |
1322 | }; |
1323 | |
1324 | if is_positive { |
1325 | result_state |
1326 | } else { |
1327 | match result_state { |
1328 | Ok(state) => Err(state), |
1329 | Err(state) => Ok(state), |
1330 | } |
1331 | } |
1332 | } |
1333 | |
1334 | /// Transformation which stops `Token`s from being generated according to `is_atomic`. |
1335 | /// Used as wrapper over `rule` (or even another `atomic`) call. |
1336 | /// |
1337 | /// # Examples |
1338 | /// |
1339 | /// ``` |
1340 | /// # use pest::{self, Atomicity}; |
1341 | /// # #[allow (non_camel_case_types)] |
1342 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1343 | /// enum Rule { |
1344 | /// a |
1345 | /// } |
1346 | /// |
1347 | /// let input = "a" ; |
1348 | /// let pairs: Vec<_> = pest::state(input, |state| { |
1349 | /// state.atomic(Atomicity::Atomic, |s| { |
1350 | /// s.rule(Rule::a, |s| Ok(s)) |
1351 | /// }) |
1352 | /// }).unwrap().collect(); |
1353 | /// |
1354 | /// assert_eq!(pairs.len(), 0); |
1355 | /// ``` |
1356 | #[inline ] |
1357 | pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>> |
1358 | where |
1359 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
1360 | { |
1361 | self = self.inc_call_check_limit()?; |
1362 | // In case child parsing call is another `atomic` it will have its own atomicity status. |
1363 | let initial_atomicity = self.atomicity; |
1364 | // In case child atomicity is the same as we've demanded, we shouldn't do nothing. |
1365 | // E.g. we have the following rules: |
1366 | // * RootRule = @{ InnerRule } |
1367 | // * InnerRule = @{ ... } |
1368 | let should_toggle = self.atomicity != atomicity; |
1369 | |
1370 | // Note that we take atomicity of the top rule and not of the leaf (inner). |
1371 | if should_toggle { |
1372 | self.atomicity = atomicity; |
1373 | } |
1374 | |
1375 | let result = f(self); |
1376 | |
1377 | match result { |
1378 | Ok(mut new_state) => { |
1379 | if should_toggle { |
1380 | new_state.atomicity = initial_atomicity; |
1381 | } |
1382 | Ok(new_state) |
1383 | } |
1384 | Err(mut new_state) => { |
1385 | if should_toggle { |
1386 | new_state.atomicity = initial_atomicity; |
1387 | } |
1388 | Err(new_state) |
1389 | } |
1390 | } |
1391 | } |
1392 | |
1393 | /// Evaluates the result of closure `f` and pushes the span of the input consumed from before |
1394 | /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is |
1395 | /// called successfully, or `Err(Box<ParserState>)` otherwise. |
1396 | /// |
1397 | /// # Examples |
1398 | /// |
1399 | /// ``` |
1400 | /// # use pest; |
1401 | /// # #[allow (non_camel_case_types)] |
1402 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1403 | /// enum Rule {} |
1404 | /// |
1405 | /// let input = "ab" ; |
1406 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1407 | /// let mut result = state.stack_push(|state| state.match_string("a" )); |
1408 | /// assert!(result.is_ok()); |
1409 | /// assert_eq!(result.unwrap().position().pos(), 1); |
1410 | /// ``` |
1411 | #[inline ] |
1412 | pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
1413 | where |
1414 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
1415 | { |
1416 | self = self.inc_call_check_limit()?; |
1417 | let start = self.position; |
1418 | |
1419 | let result = f(self); |
1420 | |
1421 | match result { |
1422 | Ok(mut state) => { |
1423 | let end = state.position; |
1424 | state.stack.push(start.span(&end)); |
1425 | Ok(state) |
1426 | } |
1427 | Err(state) => Err(state), |
1428 | } |
1429 | } |
1430 | |
1431 | /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)` |
1432 | /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise. |
1433 | /// |
1434 | /// # Examples |
1435 | /// |
1436 | /// ``` |
1437 | /// # use pest; |
1438 | /// # #[allow (non_camel_case_types)] |
1439 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1440 | /// enum Rule {} |
1441 | /// |
1442 | /// let input = "aa" ; |
1443 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1444 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
1445 | /// |state| state.stack_peek() |
1446 | /// ); |
1447 | /// assert!(result.is_ok()); |
1448 | /// assert_eq!(result.unwrap().position().pos(), 2); |
1449 | /// ``` |
1450 | #[inline ] |
1451 | pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> { |
1452 | let string = self |
1453 | .stack |
1454 | .peek() |
1455 | .expect("peek was called on empty stack" ) |
1456 | .as_str(); |
1457 | self.match_string(string) |
1458 | } |
1459 | |
1460 | /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)` |
1461 | /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise. |
1462 | /// |
1463 | /// # Examples |
1464 | /// |
1465 | /// ``` |
1466 | /// # use pest; |
1467 | /// # #[allow (non_camel_case_types)] |
1468 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1469 | /// enum Rule {} |
1470 | /// |
1471 | /// let input = "aa" ; |
1472 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1473 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
1474 | /// |state| state.stack_pop() |
1475 | /// ); |
1476 | /// assert!(result.is_ok()); |
1477 | /// assert_eq!(result.unwrap().position().pos(), 2); |
1478 | /// ``` |
1479 | #[inline ] |
1480 | pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
1481 | let string = self |
1482 | .stack |
1483 | .pop() |
1484 | .expect("pop was called on empty stack" ) |
1485 | .as_str(); |
1486 | self.match_string(string) |
1487 | } |
1488 | |
1489 | /// Matches part of the state of the stack. |
1490 | /// |
1491 | /// # Examples |
1492 | /// |
1493 | /// ``` |
1494 | /// # use pest::{self, MatchDir}; |
1495 | /// # #[allow (non_camel_case_types)] |
1496 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1497 | /// enum Rule {} |
1498 | /// |
1499 | /// let input = "abcd cd cb" ; |
1500 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1501 | /// let mut result = state |
1502 | /// .stack_push(|state| state.match_string("a" )) |
1503 | /// .and_then(|state| state.stack_push(|state| state.match_string("b" ))) |
1504 | /// .and_then(|state| state.stack_push(|state| state.match_string("c" ))) |
1505 | /// .and_then(|state| state.stack_push(|state| state.match_string("d" ))) |
1506 | /// .and_then(|state| state.match_string(" " )) |
1507 | /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop)) |
1508 | /// .and_then(|state| state.match_string(" " )) |
1509 | /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom)); |
1510 | /// assert!(result.is_ok()); |
1511 | /// assert_eq!(result.unwrap().position().pos(), 10); |
1512 | /// ``` |
1513 | #[inline ] |
1514 | pub fn stack_match_peek_slice( |
1515 | mut self: Box<Self>, |
1516 | start: i32, |
1517 | end: Option<i32>, |
1518 | match_dir: MatchDir, |
1519 | ) -> ParseResult<Box<Self>> { |
1520 | let range = match constrain_idxs(start, end, self.stack.len()) { |
1521 | Some(r) => r, |
1522 | None => return Err(self), |
1523 | }; |
1524 | // return true if an empty sequence is requested |
1525 | if range.end <= range.start { |
1526 | return Ok(self); |
1527 | } |
1528 | |
1529 | let mut position = self.position; |
1530 | let result = { |
1531 | let mut iter_b2t = self.stack[range].iter(); |
1532 | let matcher = |span: &Span<'_>| position.match_string(span.as_str()); |
1533 | match match_dir { |
1534 | MatchDir::BottomToTop => iter_b2t.all(matcher), |
1535 | MatchDir::TopToBottom => iter_b2t.rev().all(matcher), |
1536 | } |
1537 | }; |
1538 | if result { |
1539 | self.position = position; |
1540 | Ok(self) |
1541 | } else { |
1542 | Err(self) |
1543 | } |
1544 | } |
1545 | |
1546 | /// Matches the full state of the stack. |
1547 | /// |
1548 | /// # Examples |
1549 | /// |
1550 | /// ``` |
1551 | /// # use pest; |
1552 | /// # #[allow (non_camel_case_types)] |
1553 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1554 | /// enum Rule {} |
1555 | /// |
1556 | /// let input = "abba" ; |
1557 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1558 | /// let mut result = state |
1559 | /// .stack_push(|state| state.match_string("a" )) |
1560 | /// .and_then(|state| { state.stack_push(|state| state.match_string("b" )) }) |
1561 | /// .and_then(|state| state.stack_match_peek()); |
1562 | /// assert!(result.is_ok()); |
1563 | /// assert_eq!(result.unwrap().position().pos(), 4); |
1564 | /// ``` |
1565 | #[inline ] |
1566 | pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> { |
1567 | self.stack_match_peek_slice(0, None, MatchDir::TopToBottom) |
1568 | } |
1569 | |
1570 | /// Matches the full state of the stack. This method will clear the stack as it evaluates. |
1571 | /// |
1572 | /// # Examples |
1573 | /// |
1574 | /// ``` |
1575 | /// /// # use pest; |
1576 | /// # #[allow (non_camel_case_types)] |
1577 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1578 | /// enum Rule {} |
1579 | /// |
1580 | /// let input = "aaaa" ; |
1581 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1582 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then(|state| { |
1583 | /// state.stack_push(|state| state.match_string("a" )) |
1584 | /// }).and_then(|state| state.stack_match_peek()); |
1585 | /// assert!(result.is_ok()); |
1586 | /// assert_eq!(result.unwrap().position().pos(), 4); |
1587 | /// ``` |
1588 | #[inline ] |
1589 | pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
1590 | let mut position = self.position; |
1591 | let mut result = true; |
1592 | while let Some(span) = self.stack.pop() { |
1593 | result = position.match_string(span.as_str()); |
1594 | if !result { |
1595 | break; |
1596 | } |
1597 | } |
1598 | |
1599 | if result { |
1600 | self.position = position; |
1601 | Ok(self) |
1602 | } else { |
1603 | Err(self) |
1604 | } |
1605 | } |
1606 | |
1607 | /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or |
1608 | /// `Err(Box<ParserState>)` otherwise. |
1609 | /// |
1610 | /// # Examples |
1611 | /// |
1612 | /// ``` |
1613 | /// # use pest; |
1614 | /// # #[allow (non_camel_case_types)] |
1615 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1616 | /// enum Rule {} |
1617 | /// |
1618 | /// let input = "aa" ; |
1619 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1620 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
1621 | /// |state| state.stack_drop() |
1622 | /// ); |
1623 | /// assert!(result.is_ok()); |
1624 | /// assert_eq!(result.unwrap().position().pos(), 1); |
1625 | /// ``` |
1626 | #[inline ] |
1627 | pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
1628 | match self.stack.pop() { |
1629 | Some(_) => Ok(self), |
1630 | None => Err(self), |
1631 | } |
1632 | } |
1633 | |
1634 | /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently, |
1635 | /// this method only restores the stack. |
1636 | /// |
1637 | /// # Examples |
1638 | /// |
1639 | /// ``` |
1640 | /// # use pest; |
1641 | /// # #[allow (non_camel_case_types)] |
1642 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
1643 | /// enum Rule {} |
1644 | /// |
1645 | /// let input = "ab" ; |
1646 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
1647 | /// let mut result = state.restore_on_err(|state| state.stack_push(|state| |
1648 | /// state.match_string("a" )).and_then(|state| state.match_string("a" )) |
1649 | /// ); |
1650 | /// |
1651 | /// assert!(result.is_err()); |
1652 | /// |
1653 | /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed. |
1654 | /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop()); |
1655 | /// assert!(catch_panic.is_err()); |
1656 | /// ``` |
1657 | #[inline ] |
1658 | pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
1659 | where |
1660 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
1661 | { |
1662 | match f(self.checkpoint()) { |
1663 | Ok(state) => Ok(state.checkpoint_ok()), |
1664 | Err(state) => Err(state.restore()), |
1665 | } |
1666 | } |
1667 | |
1668 | // Mark the current state as a checkpoint and return the `Box`. |
1669 | #[inline ] |
1670 | pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> { |
1671 | self.stack.snapshot(); |
1672 | self |
1673 | } |
1674 | |
1675 | // The checkpoint was cleared successfully |
1676 | // so remove it without touching other stack state. |
1677 | #[inline ] |
1678 | pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> { |
1679 | self.stack.clear_snapshot(); |
1680 | self |
1681 | } |
1682 | |
1683 | // Restore the current state to the most recent checkpoint. |
1684 | #[inline ] |
1685 | pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> { |
1686 | self.stack.restore(); |
1687 | self |
1688 | } |
1689 | } |
1690 | |
1691 | /// Helper function used only in case stack operations (PUSH/POP) are used in grammar. |
1692 | fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> { |
1693 | let start_norm: usize = normalize_index(i:start, len)?; |
1694 | let end_norm: usize = end.map_or(default:Some(len), |e: i32| normalize_index(i:e, len))?; |
1695 | Some(start_norm..end_norm) |
1696 | } |
1697 | |
1698 | /// `constrain_idxs` helper function. |
1699 | /// Normalizes the index using its sequence’s length. |
1700 | /// Returns `None` if the normalized index is OOB. |
1701 | fn normalize_index(i: i32, len: usize) -> Option<usize> { |
1702 | if i > len as i32 { |
1703 | None |
1704 | } else if i >= 0 { |
1705 | Some(i as usize) |
1706 | } else { |
1707 | let real_i: i32 = len as i32 + i; |
1708 | if real_i >= 0 { |
1709 | Some(real_i as usize) |
1710 | } else { |
1711 | None |
1712 | } |
1713 | } |
1714 | } |
1715 | |
1716 | #[cfg (test)] |
1717 | mod test { |
1718 | use super::*; |
1719 | |
1720 | #[test ] |
1721 | fn normalize_index_pos() { |
1722 | assert_eq!(normalize_index(4, 6), Some(4)); |
1723 | assert_eq!(normalize_index(5, 5), Some(5)); |
1724 | assert_eq!(normalize_index(6, 3), None); |
1725 | } |
1726 | |
1727 | #[test ] |
1728 | fn normalize_index_neg() { |
1729 | assert_eq!(normalize_index(-4, 6), Some(2)); |
1730 | assert_eq!(normalize_index(-5, 5), Some(0)); |
1731 | assert_eq!(normalize_index(-6, 3), None); |
1732 | } |
1733 | } |
1734 | |