| 1 | // pest. The Elegant Parser |
| 2 | // Copyright (c) 2018 Dragoș Tiselice |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 |
| 5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| 6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. All files in the project carrying such notice may not be copied, |
| 8 | // modified, or distributed except according to those terms. |
| 9 | |
| 10 | //! The core functionality of parsing grammar. |
| 11 | //! State of parser during the process of rules handling. |
| 12 | |
| 13 | use alloc::borrow::ToOwned; |
| 14 | use alloc::boxed::Box; |
| 15 | use alloc::collections::BTreeSet; |
| 16 | use alloc::rc::Rc; |
| 17 | use alloc::string::String; |
| 18 | use alloc::vec; |
| 19 | use alloc::vec::Vec; |
| 20 | use core::fmt::{Debug, Display, Formatter}; |
| 21 | use core::num::NonZeroUsize; |
| 22 | use core::ops::Range; |
| 23 | use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; |
| 24 | |
| 25 | use crate::error::{Error, ErrorVariant}; |
| 26 | use crate::iterators::pairs::new; |
| 27 | use crate::iterators::{pairs, QueueableToken}; |
| 28 | use crate::position::Position; |
| 29 | use crate::span::Span; |
| 30 | use crate::stack::Stack; |
| 31 | use crate::RuleType; |
| 32 | |
| 33 | /// The current lookahead status of a [`ParserState`]. |
| 34 | /// |
| 35 | /// [`ParserState`]: struct.ParserState.html |
| 36 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 37 | pub enum Lookahead { |
| 38 | /// The positive predicate, written as an ampersand &, |
| 39 | /// attempts to match its inner expression. |
| 40 | /// If the inner expression succeeds, parsing continues, |
| 41 | /// but at the same position as the predicate — |
| 42 | /// &foo ~ bar is thus a kind of "AND" statement: |
| 43 | /// "the input string must match foo AND bar". |
| 44 | /// If the inner expression fails, |
| 45 | /// the whole expression fails too. |
| 46 | Positive, |
| 47 | /// The negative predicate, written as an exclamation mark !, |
| 48 | /// attempts to match its inner expression. |
| 49 | /// If the inner expression fails, the predicate succeeds |
| 50 | /// and parsing continues at the same position as the predicate. |
| 51 | /// If the inner expression succeeds, the predicate fails — |
| 52 | /// !foo ~ bar is thus a kind of "NOT" statement: |
| 53 | /// "the input string must match bar but NOT foo". |
| 54 | Negative, |
| 55 | /// No lookahead (i.e. it will consume input). |
| 56 | None, |
| 57 | } |
| 58 | |
| 59 | /// The current atomicity of a [`ParserState`]. |
| 60 | /// |
| 61 | /// [`ParserState`]: struct.ParserState.html |
| 62 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 63 | pub enum Atomicity { |
| 64 | /// prevents implicit whitespace: inside an atomic rule, |
| 65 | /// the tilde ~ means "immediately followed by", |
| 66 | /// and repetition operators (asterisk * and plus sign +) |
| 67 | /// have no implicit separation. In addition, all other rules |
| 68 | /// called from an atomic rule are also treated as atomic. |
| 69 | /// (interior matching rules are silent) |
| 70 | Atomic, |
| 71 | /// The same as atomic, but inner tokens are produced as normal. |
| 72 | CompoundAtomic, |
| 73 | /// implicit whitespace is enabled |
| 74 | NonAtomic, |
| 75 | } |
| 76 | |
| 77 | /// Type alias to simplify specifying the return value of chained closures. |
| 78 | pub type ParseResult<S> = Result<S, S>; |
| 79 | |
| 80 | /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`. |
| 81 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 82 | pub enum MatchDir { |
| 83 | /// from the bottom to the top of the stack |
| 84 | BottomToTop, |
| 85 | /// from the top to the bottom of the stack |
| 86 | TopToBottom, |
| 87 | } |
| 88 | |
| 89 | static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0); |
| 90 | |
| 91 | /// Sets the maximum call limit for the parser state |
| 92 | /// to prevent stack overflows or excessive execution times |
| 93 | /// in some grammars. |
| 94 | /// If set, the calls are tracked as a running total |
| 95 | /// over all non-terminal rules that can nest closures |
| 96 | /// (which are passed to transform the parser state). |
| 97 | /// |
| 98 | /// # Arguments |
| 99 | /// |
| 100 | /// * `limit` - The maximum number of calls. If None, |
| 101 | /// the number of calls is unlimited. |
| 102 | pub fn set_call_limit(limit: Option<NonZeroUsize>) { |
| 103 | CALL_LIMIT.store(val:limit.map(|f| f.get()).unwrap_or(0), order:Ordering::Relaxed); |
| 104 | } |
| 105 | |
| 106 | static ERROR_DETAIL: AtomicBool = AtomicBool::new(false); |
| 107 | |
| 108 | /// Sets whether information for more error details |
| 109 | /// should be collected. This is useful for debugging |
| 110 | /// parser errors (as it leads to more comprehensive |
| 111 | /// error messages), but it has a higher performance cost. |
| 112 | /// (hence, it's off by default) |
| 113 | /// |
| 114 | /// # Arguments |
| 115 | /// |
| 116 | /// * `enabled` - Whether to enable the collection for |
| 117 | /// more error details. |
| 118 | pub fn set_error_detail(enabled: bool) { |
| 119 | ERROR_DETAIL.store(val:enabled, order:Ordering::Relaxed); |
| 120 | } |
| 121 | |
| 122 | #[derive (Debug)] |
| 123 | struct CallLimitTracker { |
| 124 | current_call_limit: Option<(usize, usize)>, |
| 125 | } |
| 126 | |
| 127 | impl Default for CallLimitTracker { |
| 128 | fn default() -> Self { |
| 129 | let limit: usize = CALL_LIMIT.load(order:Ordering::Relaxed); |
| 130 | let current_call_limit: Option<(usize, usize)> = if limit > 0 { Some((0, limit)) } else { None }; |
| 131 | Self { current_call_limit } |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | impl CallLimitTracker { |
| 136 | fn limit_reached(&self) -> bool { |
| 137 | self.current_call_limit |
| 138 | .map_or(default:false, |(current: usize, limit: usize)| current >= limit) |
| 139 | } |
| 140 | |
| 141 | fn increment_depth(&mut self) { |
| 142 | if let Some((current: &mut usize, _)) = &mut self.current_call_limit { |
| 143 | *current += 1; |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | /// Number of call stacks that may result from a sequence of rules parsing. |
| 149 | const CALL_STACK_INITIAL_CAPACITY: usize = 20; |
| 150 | /// Max (un)expected number of tokens that we may see on the parsing error position. |
| 151 | const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30; |
| 152 | /// Max rule children number for which we'll extend calls stacks. |
| 153 | /// |
| 154 | /// In case rule we're working with has too many children rules that failed in parsing, |
| 155 | /// we don't want to store long stacks for all of them. If rule has more than this number |
| 156 | /// of failed children, they all will be collapsed in a parent rule. |
| 157 | const CALL_STACK_CHILDREN_THRESHOLD: usize = 4; |
| 158 | |
| 159 | /// Structure tracking errored parsing call (associated with specific `ParserState` function). |
| 160 | #[derive (Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)] |
| 161 | pub enum ParseAttempt<R> { |
| 162 | /// Call of `rule` errored. |
| 163 | Rule(R), |
| 164 | /// Call of token element (e.g., `match_string` or `match_insensitive`) errored. |
| 165 | /// Works as indicator of that leaf node is not a rule. In order to get the token value we |
| 166 | /// can address `ParseAttempts` `(un)expected_tokens`. |
| 167 | Token, |
| 168 | } |
| 169 | |
| 170 | impl<R> ParseAttempt<R> { |
| 171 | pub fn get_rule(&self) -> Option<&R> { |
| 172 | match self { |
| 173 | ParseAttempt::Rule(r: &R) => Some(r), |
| 174 | ParseAttempt::Token => None, |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | /// Rules call stack. |
| 180 | /// Contains sequence of rule calls that resulted in new parsing attempt. |
| 181 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
| 182 | pub struct RulesCallStack<R> { |
| 183 | /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule). |
| 184 | pub deepest: ParseAttempt<R>, |
| 185 | /// Most top rule covering `deepest`. |
| 186 | pub parent: Option<R>, |
| 187 | } |
| 188 | |
| 189 | impl<R> RulesCallStack<R> { |
| 190 | fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> { |
| 191 | RulesCallStack { |
| 192 | deepest, |
| 193 | parent: None, |
| 194 | } |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
| 199 | pub enum ParsingToken { |
| 200 | Sensitive { token: String }, |
| 201 | Insensitive { token: String }, |
| 202 | Range { start: char, end: char }, |
| 203 | BuiltInRule, |
| 204 | } |
| 205 | |
| 206 | impl Display for ParsingToken { |
| 207 | fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { |
| 208 | match self { |
| 209 | ParsingToken::Sensitive { token: &String } => write!(f, " {token}" ), |
| 210 | ParsingToken::Insensitive { token: &String } => write!(f, " {}" , token.to_uppercase()), |
| 211 | ParsingToken::Range { start: &char, end: &char } => write!(f, " {start}.. {end}" ), |
| 212 | ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE" ), |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | /// Structure that tracks all the parsing attempts made on the max position. |
| 218 | /// We want to give an error hint about parsing rules that succeeded |
| 219 | /// at the farthest input position. |
| 220 | /// The intuition is such rules will be most likely the query user initially wanted to write. |
| 221 | #[derive (Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] |
| 222 | pub struct ParseAttempts<R> { |
| 223 | /// Indicates whether the parsing attempts are tracked. |
| 224 | enabled: bool, |
| 225 | /// Vec of rule calls sequences awaiting tokens at the same `max_position`. |
| 226 | /// If there are several stacks in vec, it means all those rule stacks are "equal" |
| 227 | /// because their attempts occurred on the same position. |
| 228 | pub call_stacks: Vec<RulesCallStack<R>>, |
| 229 | /// Tokens that could be putted at `max_position` |
| 230 | /// in order to get a valid grammar query. |
| 231 | expected_tokens: Vec<ParsingToken>, |
| 232 | /// Tokens that we've prohibited to be putted at `max_position` |
| 233 | /// in order to get a valid grammar query. |
| 234 | unexpected_tokens: Vec<ParsingToken>, |
| 235 | /// Max position at which we were expecting to see one of `expected_tokens`. |
| 236 | pub max_position: usize, |
| 237 | } |
| 238 | |
| 239 | impl<R: RuleType> ParseAttempts<R> { |
| 240 | /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens` |
| 241 | /// initialized with capacity. |
| 242 | pub fn new() -> Self { |
| 243 | Self { |
| 244 | call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY), |
| 245 | expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), |
| 246 | unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), |
| 247 | max_position: 0, |
| 248 | enabled: ERROR_DETAIL.load(Ordering::Relaxed), |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | /// Get number of currently present call stacks. |
| 253 | fn call_stacks_number(&self) -> usize { |
| 254 | self.call_stacks.len() |
| 255 | } |
| 256 | |
| 257 | pub fn expected_tokens(&self) -> Vec<ParsingToken> { |
| 258 | self.expected_tokens |
| 259 | .iter() |
| 260 | .cloned() |
| 261 | .collect::<BTreeSet<_>>() |
| 262 | .into_iter() |
| 263 | .collect() |
| 264 | } |
| 265 | |
| 266 | pub fn unexpected_tokens(&self) -> Vec<ParsingToken> { |
| 267 | self.unexpected_tokens |
| 268 | .iter() |
| 269 | .cloned() |
| 270 | .collect::<BTreeSet<_>>() |
| 271 | .into_iter() |
| 272 | .collect() |
| 273 | } |
| 274 | |
| 275 | /// Retrieve call stacks. |
| 276 | pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> { |
| 277 | self.call_stacks |
| 278 | .iter() |
| 279 | .cloned() |
| 280 | .collect::<BTreeSet<_>>() |
| 281 | .into_iter() |
| 282 | .collect() |
| 283 | } |
| 284 | |
| 285 | /// In case we've tried to parse a rule, which start position is bigger than previous |
| 286 | /// `max_position` it means that we've advanced in our parsing and found better candidate. |
| 287 | /// |
| 288 | /// `start_index` is: |
| 289 | /// * Number of call stacks present in state at the moment current `rule` was called. The idea |
| 290 | /// is that we'd like to update only those stacks that originated from the current `rule` and |
| 291 | /// not from those that were called previously. |
| 292 | /// * 0 in case we've successfully parsed some token since the moment `rule` was called. |
| 293 | fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) { |
| 294 | let mut non_token_call_stacks = Vec::new(); |
| 295 | let mut token_call_stack_met = false; |
| 296 | for call_stack in self.call_stacks.iter().skip(start_index) { |
| 297 | if matches!(call_stack.deepest, ParseAttempt::Token) { |
| 298 | token_call_stack_met = true; |
| 299 | } else { |
| 300 | non_token_call_stacks.push(call_stack.clone()) |
| 301 | } |
| 302 | } |
| 303 | if token_call_stack_met && non_token_call_stacks.is_empty() { |
| 304 | // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone |
| 305 | // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a |
| 306 | // rule) as soon as it doesn't give us any useful additional info. |
| 307 | non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token)); |
| 308 | } |
| 309 | self.call_stacks |
| 310 | .splice(start_index.., non_token_call_stacks); |
| 311 | |
| 312 | let children_number_over_threshold = |
| 313 | self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD; |
| 314 | if children_number_over_threshold { |
| 315 | self.call_stacks.truncate(start_index); |
| 316 | self.call_stacks |
| 317 | .push(RulesCallStack::new(ParseAttempt::Rule(rule))); |
| 318 | } else { |
| 319 | for call_stack in self.call_stacks.iter_mut().skip(start_index) { |
| 320 | if matches!(call_stack.deepest, ParseAttempt::Token) { |
| 321 | call_stack.deepest = ParseAttempt::Rule(rule); |
| 322 | } else { |
| 323 | call_stack.parent = Some(rule); |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | /// If `expected` flag is set to false, it means we've successfully parsed token being in the |
| 330 | /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise, |
| 331 | /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`. |
| 332 | /// |
| 333 | /// In case `position` is: |
| 334 | /// * Equal to `max_position`, add `token` to `target_vec`, |
| 335 | /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`. |
| 336 | #[allow (clippy::comparison_chain)] |
| 337 | fn try_add_new_token( |
| 338 | &mut self, |
| 339 | token: ParsingToken, |
| 340 | start_position: usize, |
| 341 | position: usize, |
| 342 | negative_lookahead: bool, |
| 343 | ) { |
| 344 | let target_vec_push_token = |attempts: &mut ParseAttempts<R>| { |
| 345 | let target_vec = if negative_lookahead { |
| 346 | &mut attempts.unexpected_tokens |
| 347 | } else { |
| 348 | &mut attempts.expected_tokens |
| 349 | }; |
| 350 | target_vec.push(token); |
| 351 | }; |
| 352 | |
| 353 | if position > self.max_position { |
| 354 | if negative_lookahead && start_position > self.max_position { |
| 355 | // We encountered a sequence under negative lookahead. |
| 356 | // We would like to track only first failed token in this sequence (which |
| 357 | // `start_position` should be equal to `self.max_position`). |
| 358 | return; |
| 359 | } |
| 360 | target_vec_push_token(self); |
| 361 | |
| 362 | if negative_lookahead { |
| 363 | // In case of successful parsing of token under negative lookahead the only |
| 364 | // thing we'd like to do is to track the token in the `unexpected_tokens`. |
| 365 | return; |
| 366 | } |
| 367 | self.max_position = position; |
| 368 | self.expected_tokens.clear(); |
| 369 | self.unexpected_tokens.clear(); |
| 370 | self.call_stacks.clear(); |
| 371 | self.call_stacks |
| 372 | .push(RulesCallStack::new(ParseAttempt::Token)); |
| 373 | } else if position == self.max_position { |
| 374 | target_vec_push_token(self); |
| 375 | self.call_stacks |
| 376 | .push(RulesCallStack::new(ParseAttempt::Token)); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | /// Reset state in case we've successfully parsed some token in |
| 381 | /// `match_string` or `match_insensetive`. |
| 382 | fn nullify_expected_tokens(&mut self, new_max_position: usize) { |
| 383 | self.call_stacks.clear(); |
| 384 | self.expected_tokens.clear(); |
| 385 | self.unexpected_tokens.clear(); |
| 386 | self.max_position = new_max_position; |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | impl<R: RuleType> Default for ParseAttempts<R> { |
| 391 | fn default() -> Self { |
| 392 | Self::new() |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | /// The complete state of a [`Parser`]. |
| 397 | /// |
| 398 | /// [`Parser`]: trait.Parser.html |
| 399 | #[derive (Debug)] |
| 400 | pub struct ParserState<'i, R: RuleType> { |
| 401 | /// Current position from which we try to apply some parser function. |
| 402 | /// Initially is 0. |
| 403 | /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive` |
| 404 | /// and switched our `position` from 0 to the length of "create". |
| 405 | /// |
| 406 | /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`. |
| 407 | position: Position<'i>, |
| 408 | /// Queue representing rules partially (`QueueableToken::Start`) and |
| 409 | /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state |
| 410 | /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to |
| 411 | /// `End` state. |
| 412 | queue: Vec<QueueableToken<'i, R>>, |
| 413 | /// Status set in case specific lookahead logic is used in grammar. |
| 414 | /// See `Lookahead` for more information. |
| 415 | lookahead: Lookahead, |
| 416 | /// Rules that we HAVE expected, tried to parse, but failed. |
| 417 | pos_attempts: Vec<R>, |
| 418 | /// Rules that we have NOT expected, tried to parse, but failed. |
| 419 | neg_attempts: Vec<R>, |
| 420 | /// Max position in the query from which we've tried to parse some rule but failed. |
| 421 | attempt_pos: usize, |
| 422 | /// Current atomicity status. For more information see `Atomicity`. |
| 423 | atomicity: Atomicity, |
| 424 | /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP |
| 425 | /// invocations). |
| 426 | stack: Stack<Span<'i>>, |
| 427 | /// Used for setting max parser calls limit. |
| 428 | call_tracker: CallLimitTracker, |
| 429 | /// Together with tracking of `pos_attempts` and `attempt_pos` |
| 430 | /// as a pair of (list of rules that we've tried to parse but failed, max parsed position) |
| 431 | /// we track those rules (which we've tried to parse at the same max pos) at this helper struct. |
| 432 | /// |
| 433 | /// Note, that we may try to parse several rules on different positions. We want to track only |
| 434 | /// those rules, which attempt position is bigger, because we consider that it's nearer to the |
| 435 | /// query that user really wanted to pass. |
| 436 | /// |
| 437 | /// E.g. we have a query `create user "Bobby"` and two root rules: |
| 438 | /// * CreateUser = { "create" ~ "user" ~ Name } |
| 439 | /// * CreateTable = { "create" ~ "table" ~ Name } |
| 440 | /// * Name = { SOME_DEFINITION } |
| 441 | /// |
| 442 | /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd |
| 443 | /// successfully parse "create" + "user" (and not "table"). |
| 444 | parse_attempts: ParseAttempts<R>, |
| 445 | } |
| 446 | |
| 447 | /// Creates a `ParserState` from a `&str`, supplying it to a closure `f`. |
| 448 | /// |
| 449 | /// # Examples |
| 450 | /// |
| 451 | /// ``` |
| 452 | /// # use pest; |
| 453 | /// let input = "" ; |
| 454 | /// pest::state::<(), _>(input, |s| Ok(s)).unwrap(); |
| 455 | /// ``` |
| 456 | #[allow (clippy::perf)] |
| 457 | pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>> |
| 458 | where |
| 459 | F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>, |
| 460 | { |
| 461 | let state = ParserState::new(input); |
| 462 | |
| 463 | match f(state) { |
| 464 | Ok(state) => { |
| 465 | let len = state.queue.len(); |
| 466 | Ok(new(Rc::new(state.queue), input, None, 0, len)) |
| 467 | } |
| 468 | Err(mut state) => { |
| 469 | let variant = if state.reached_call_limit() { |
| 470 | ErrorVariant::CustomError { |
| 471 | message: "call limit reached" .to_owned(), |
| 472 | } |
| 473 | } else { |
| 474 | state.pos_attempts.sort(); |
| 475 | state.pos_attempts.dedup(); |
| 476 | state.neg_attempts.sort(); |
| 477 | state.neg_attempts.dedup(); |
| 478 | ErrorVariant::ParsingError { |
| 479 | positives: state.pos_attempts.clone(), |
| 480 | negatives: state.neg_attempts.clone(), |
| 481 | } |
| 482 | }; |
| 483 | |
| 484 | if state.parse_attempts.enabled { |
| 485 | Err(Error::new_from_pos_with_parsing_attempts( |
| 486 | variant, |
| 487 | Position::new_internal(input, state.attempt_pos), |
| 488 | state.parse_attempts.clone(), |
| 489 | )) |
| 490 | } else { |
| 491 | Err(Error::new_from_pos( |
| 492 | variant, |
| 493 | Position::new_internal(input, state.attempt_pos), |
| 494 | )) |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | } |
| 499 | |
| 500 | impl<'i, R: RuleType> ParserState<'i, R> { |
| 501 | /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box` |
| 502 | /// will be passed from closure to closure based on the needs of the specified `Parser`. |
| 503 | /// |
| 504 | /// # Examples |
| 505 | /// |
| 506 | /// ``` |
| 507 | /// # use pest; |
| 508 | /// let input = "" ; |
| 509 | /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input); |
| 510 | /// ``` |
| 511 | pub fn new(input: &'i str) -> Box<Self> { |
| 512 | Box::new(ParserState { |
| 513 | position: Position::from_start(input), |
| 514 | queue: vec![], |
| 515 | lookahead: Lookahead::None, |
| 516 | pos_attempts: vec![], |
| 517 | neg_attempts: vec![], |
| 518 | attempt_pos: 0, |
| 519 | atomicity: Atomicity::NonAtomic, |
| 520 | stack: Stack::new(), |
| 521 | call_tracker: Default::default(), |
| 522 | parse_attempts: ParseAttempts::new(), |
| 523 | }) |
| 524 | } |
| 525 | |
| 526 | /// Get all parse attempts after process of parsing is finished. |
| 527 | pub fn get_parse_attempts(&self) -> &ParseAttempts<R> { |
| 528 | &self.parse_attempts |
| 529 | } |
| 530 | |
| 531 | /// Returns a reference to the current `Position` of the `ParserState`. |
| 532 | /// |
| 533 | /// # Examples |
| 534 | /// |
| 535 | /// ``` |
| 536 | /// # use pest; |
| 537 | /// # #[allow (non_camel_case_types)] |
| 538 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 539 | /// enum Rule { |
| 540 | /// ab |
| 541 | /// } |
| 542 | /// |
| 543 | /// let input = "ab" ; |
| 544 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 545 | /// let position = state.position(); |
| 546 | /// assert_eq!(position.pos(), 0); |
| 547 | /// ``` |
| 548 | pub fn position(&self) -> &Position<'i> { |
| 549 | &self.position |
| 550 | } |
| 551 | |
| 552 | /// Returns the current atomicity of the `ParserState`. |
| 553 | /// |
| 554 | /// # Examples |
| 555 | /// |
| 556 | /// ``` |
| 557 | /// # use pest; |
| 558 | /// # use pest::Atomicity; |
| 559 | /// # #[allow (non_camel_case_types)] |
| 560 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 561 | /// enum Rule { |
| 562 | /// ab |
| 563 | /// } |
| 564 | /// |
| 565 | /// let input = "ab" ; |
| 566 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 567 | /// let atomicity = state.atomicity(); |
| 568 | /// assert_eq!(atomicity, Atomicity::NonAtomic); |
| 569 | /// ``` |
| 570 | pub fn atomicity(&self) -> Atomicity { |
| 571 | self.atomicity |
| 572 | } |
| 573 | |
| 574 | #[inline ] |
| 575 | fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
| 576 | if self.call_tracker.limit_reached() { |
| 577 | return Err(self); |
| 578 | } |
| 579 | self.call_tracker.increment_depth(); |
| 580 | Ok(self) |
| 581 | } |
| 582 | |
| 583 | #[inline ] |
| 584 | fn reached_call_limit(&self) -> bool { |
| 585 | self.call_tracker.limit_reached() |
| 586 | } |
| 587 | |
| 588 | /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure |
| 589 | /// meant to match the rule. |
| 590 | /// |
| 591 | /// # Examples |
| 592 | /// |
| 593 | /// ``` |
| 594 | /// # use pest; |
| 595 | /// # #[allow (non_camel_case_types)] |
| 596 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 597 | /// enum Rule { |
| 598 | /// a |
| 599 | /// } |
| 600 | /// |
| 601 | /// let input = "a" ; |
| 602 | /// let pairs: Vec<_> = pest::state(input, |state| { |
| 603 | /// state.rule(Rule::a, |s| Ok(s)) |
| 604 | /// }).unwrap().collect(); |
| 605 | /// |
| 606 | /// assert_eq!(pairs.len(), 1); |
| 607 | /// ``` |
| 608 | #[inline ] |
| 609 | pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>> |
| 610 | where |
| 611 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 612 | { |
| 613 | self = self.inc_call_check_limit()?; |
| 614 | // Position from which this `rule` starts parsing. |
| 615 | let actual_pos = self.position.pos(); |
| 616 | // Remember index of the `self.queue` element that will be associated with this `rule`. |
| 617 | let index = self.queue.len(); |
| 618 | |
| 619 | let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos { |
| 620 | (self.pos_attempts.len(), self.neg_attempts.len()) |
| 621 | } else { |
| 622 | // Attempts have not been cleared yet since the attempt_pos is older. |
| 623 | (0, 0) |
| 624 | }; |
| 625 | |
| 626 | if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic { |
| 627 | // Pair's position will only be known after running the closure. |
| 628 | self.queue.push(QueueableToken::Start { |
| 629 | end_token_index: 0, |
| 630 | input_pos: actual_pos, |
| 631 | }); |
| 632 | } |
| 633 | |
| 634 | // Remember attempts number before `f` call. |
| 635 | // In `track` using this variable we can say, how many attempts were added |
| 636 | // during children rules traversal. |
| 637 | let attempts = self.attempts_at(actual_pos); |
| 638 | // Number of call stacks present in `self.parse_attempts` before `f` call. |
| 639 | // We need to remember this number only in case there wasn't found any farther attempt. |
| 640 | // E.g. we are handling rule, on start position of which may be tested two |
| 641 | // children rules. At the moment we'll return from `f` call below, |
| 642 | // there will be two more children rules in `self.parse_attempts` that we'll |
| 643 | // consider to be the children of current `rule`. |
| 644 | let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number(); |
| 645 | // Max parsing attempt position at the moment of `rule` handling. |
| 646 | // It case it's raised during children rules handling, it means |
| 647 | // we've made a parsing progress. |
| 648 | let remember_max_position = self.parse_attempts.max_position; |
| 649 | |
| 650 | let result = f(self); |
| 651 | |
| 652 | let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| { |
| 653 | if new_state.parse_attempts.max_position > remember_max_position { |
| 654 | // It means that one of `match_string` or e.g. `match_insensetive` function calls |
| 655 | // have already erased `self.parse_attempts.call_stacks` and that previously |
| 656 | // remembered values are not valid anymore. |
| 657 | remember_call_stacks_number = 0; |
| 658 | } |
| 659 | if !matches!(new_state.atomicity, Atomicity::Atomic) { |
| 660 | new_state |
| 661 | .parse_attempts |
| 662 | .try_add_new_stack_rule(rule, remember_call_stacks_number); |
| 663 | } |
| 664 | }; |
| 665 | |
| 666 | match result { |
| 667 | Ok(mut new_state) => { |
| 668 | if new_state.lookahead == Lookahead::Negative { |
| 669 | new_state.track( |
| 670 | rule, |
| 671 | actual_pos, |
| 672 | pos_attempts_index, |
| 673 | neg_attempts_index, |
| 674 | attempts, |
| 675 | ); |
| 676 | } |
| 677 | |
| 678 | if new_state.lookahead == Lookahead::None |
| 679 | && new_state.atomicity != Atomicity::Atomic |
| 680 | { |
| 681 | // Index of `QueueableToken::End` token added below |
| 682 | // that corresponds to previously added `QueueableToken::Start` token. |
| 683 | let new_index = new_state.queue.len(); |
| 684 | match new_state.queue[index] { |
| 685 | QueueableToken::Start { |
| 686 | ref mut end_token_index, |
| 687 | .. |
| 688 | } => *end_token_index = new_index, |
| 689 | _ => unreachable!(), |
| 690 | }; |
| 691 | |
| 692 | let new_pos = new_state.position.pos(); |
| 693 | |
| 694 | new_state.queue.push(QueueableToken::End { |
| 695 | start_token_index: index, |
| 696 | rule, |
| 697 | tag: None, |
| 698 | input_pos: new_pos, |
| 699 | }); |
| 700 | } |
| 701 | |
| 702 | // Note, that we need to count positive parsing results too, because we can fail in |
| 703 | // optional rule call inside which may lie the farthest |
| 704 | // parsed token. |
| 705 | if new_state.parse_attempts.enabled { |
| 706 | try_add_rule_to_stack(&mut new_state); |
| 707 | } |
| 708 | Ok(new_state) |
| 709 | } |
| 710 | Err(mut new_state) => { |
| 711 | if new_state.lookahead != Lookahead::Negative { |
| 712 | new_state.track( |
| 713 | rule, |
| 714 | actual_pos, |
| 715 | pos_attempts_index, |
| 716 | neg_attempts_index, |
| 717 | attempts, |
| 718 | ); |
| 719 | if new_state.parse_attempts.enabled { |
| 720 | try_add_rule_to_stack(&mut new_state); |
| 721 | } |
| 722 | } |
| 723 | |
| 724 | if new_state.lookahead == Lookahead::None |
| 725 | && new_state.atomicity != Atomicity::Atomic |
| 726 | { |
| 727 | new_state.queue.truncate(index); |
| 728 | } |
| 729 | |
| 730 | Err(new_state) |
| 731 | } |
| 732 | } |
| 733 | } |
| 734 | |
| 735 | /// Tag current node |
| 736 | /// |
| 737 | /// # Examples |
| 738 | /// |
| 739 | /// Try to recognize the one specified in a set of characters |
| 740 | /// |
| 741 | /// ``` |
| 742 | /// use pest::{state, ParseResult, ParserState, iterators::Pair}; |
| 743 | /// #[allow(non_camel_case_types)] |
| 744 | /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 745 | /// enum Rule { |
| 746 | /// character, |
| 747 | /// } |
| 748 | /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { |
| 749 | /// state.sequence(|state| { |
| 750 | /// character(state) |
| 751 | /// .and_then(|state| character(state)) |
| 752 | /// .and_then(|state| character(state)) |
| 753 | /// .and_then(|state| state.tag_node("c" )) |
| 754 | /// .and_then(|state| character(state)) |
| 755 | /// }) |
| 756 | /// } |
| 757 | /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> { |
| 758 | /// state.rule(Rule::character, |state| state.match_range('a' ..'z' )) |
| 759 | /// } |
| 760 | /// |
| 761 | /// let input = "abcd" ; |
| 762 | /// let pairs = state(input, mark_c).unwrap(); |
| 763 | /// // find all node tag as `c` |
| 764 | /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c" )).collect(); |
| 765 | /// assert_eq!(find[0].as_str(), "c" ) |
| 766 | /// ``` |
| 767 | #[inline ] |
| 768 | pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> { |
| 769 | if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() { |
| 770 | *old = Some(tag) |
| 771 | } |
| 772 | Ok(self) |
| 773 | } |
| 774 | |
| 775 | /// Get number of allowed rules attempts + prohibited rules attempts. |
| 776 | fn attempts_at(&self, pos: usize) -> usize { |
| 777 | if self.attempt_pos == pos { |
| 778 | self.pos_attempts.len() + self.neg_attempts.len() |
| 779 | } else { |
| 780 | 0 |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | fn track( |
| 785 | &mut self, |
| 786 | rule: R, |
| 787 | pos: usize, |
| 788 | pos_attempts_index: usize, |
| 789 | neg_attempts_index: usize, |
| 790 | prev_attempts: usize, |
| 791 | ) { |
| 792 | if self.atomicity == Atomicity::Atomic { |
| 793 | return; |
| 794 | } |
| 795 | |
| 796 | // If nested rules made no progress, there is no use to report them; it's only useful to |
| 797 | // track the current rule, the exception being when only one attempt has been made during |
| 798 | // the children rules. |
| 799 | let curr_attempts = self.attempts_at(pos); |
| 800 | if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 { |
| 801 | return; |
| 802 | } |
| 803 | |
| 804 | if pos == self.attempt_pos { |
| 805 | self.pos_attempts.truncate(pos_attempts_index); |
| 806 | self.neg_attempts.truncate(neg_attempts_index); |
| 807 | } |
| 808 | |
| 809 | if pos > self.attempt_pos { |
| 810 | self.pos_attempts.clear(); |
| 811 | self.neg_attempts.clear(); |
| 812 | self.attempt_pos = pos; |
| 813 | } |
| 814 | |
| 815 | let attempts = if self.lookahead != Lookahead::Negative { |
| 816 | &mut self.pos_attempts |
| 817 | } else { |
| 818 | &mut self.neg_attempts |
| 819 | }; |
| 820 | |
| 821 | if pos == self.attempt_pos { |
| 822 | attempts.push(rule); |
| 823 | } |
| 824 | } |
| 825 | |
| 826 | /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns |
| 827 | /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current |
| 828 | /// `Box<ParserState>` otherwise. |
| 829 | /// |
| 830 | /// This method is useful to parse sequences that only match together which usually come in the |
| 831 | /// form of chained `Result`s with |
| 832 | /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then). |
| 833 | /// |
| 834 | /// |
| 835 | /// # Examples |
| 836 | /// |
| 837 | /// ``` |
| 838 | /// # use pest; |
| 839 | /// # #[allow (non_camel_case_types)] |
| 840 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 841 | /// enum Rule { |
| 842 | /// a |
| 843 | /// } |
| 844 | /// |
| 845 | /// let input = "a" ; |
| 846 | /// let pairs: Vec<_> = pest::state(input, |state| { |
| 847 | /// state.sequence(|s| { |
| 848 | /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| { |
| 849 | /// s.match_string("b" ) |
| 850 | /// }) |
| 851 | /// }).or_else(|s| { |
| 852 | /// Ok(s) |
| 853 | /// }) |
| 854 | /// }).unwrap().collect(); |
| 855 | /// |
| 856 | /// assert_eq!(pairs.len(), 0); |
| 857 | /// ``` |
| 858 | #[inline ] |
| 859 | pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
| 860 | where |
| 861 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 862 | { |
| 863 | self = self.inc_call_check_limit()?; |
| 864 | let token_index = self.queue.len(); |
| 865 | let initial_pos = self.position; |
| 866 | |
| 867 | let result = f(self); |
| 868 | |
| 869 | match result { |
| 870 | Ok(new_state) => Ok(new_state), |
| 871 | Err(mut new_state) => { |
| 872 | // Restore the initial position and truncate the token queue. |
| 873 | new_state.position = initial_pos; |
| 874 | new_state.queue.truncate(token_index); |
| 875 | Err(new_state) |
| 876 | } |
| 877 | } |
| 878 | } |
| 879 | |
| 880 | /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns |
| 881 | /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`. |
| 882 | /// |
| 883 | /// # Examples |
| 884 | /// |
| 885 | /// ``` |
| 886 | /// # use pest; |
| 887 | /// # #[allow (non_camel_case_types)] |
| 888 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 889 | /// enum Rule { |
| 890 | /// ab |
| 891 | /// } |
| 892 | /// |
| 893 | /// let input = "aab" ; |
| 894 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 895 | /// let mut result = state.repeat(|s| { |
| 896 | /// s.match_string("a" ) |
| 897 | /// }); |
| 898 | /// assert!(result.is_ok()); |
| 899 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 900 | /// |
| 901 | /// state = pest::ParserState::new(input); |
| 902 | /// result = state.repeat(|s| { |
| 903 | /// s.match_string("b" ) |
| 904 | /// }); |
| 905 | /// assert!(result.is_ok()); |
| 906 | /// assert_eq!(result.unwrap().position().pos(), 0); |
| 907 | /// ``` |
| 908 | #[inline ] |
| 909 | pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>> |
| 910 | where |
| 911 | F: FnMut(Box<Self>) -> ParseResult<Box<Self>>, |
| 912 | { |
| 913 | self = self.inc_call_check_limit()?; |
| 914 | let mut result = f(self); |
| 915 | |
| 916 | loop { |
| 917 | match result { |
| 918 | Ok(state) => result = f(state), |
| 919 | Err(state) => return Ok(state), |
| 920 | }; |
| 921 | } |
| 922 | } |
| 923 | |
| 924 | /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns |
| 925 | /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`. |
| 926 | /// |
| 927 | /// # Examples |
| 928 | /// |
| 929 | /// ``` |
| 930 | /// # use pest; |
| 931 | /// # #[allow (non_camel_case_types)] |
| 932 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 933 | /// enum Rule { |
| 934 | /// ab |
| 935 | /// } |
| 936 | /// |
| 937 | /// let input = "ab" ; |
| 938 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 939 | /// let result = state.optional(|s| { |
| 940 | /// s.match_string("ab" ) |
| 941 | /// }); |
| 942 | /// assert!(result.is_ok()); |
| 943 | /// |
| 944 | /// state = pest::ParserState::new(input); |
| 945 | /// let result = state.optional(|s| { |
| 946 | /// s.match_string("ac" ) |
| 947 | /// }); |
| 948 | /// assert!(result.is_ok()); |
| 949 | /// ``` |
| 950 | #[inline ] |
| 951 | pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
| 952 | where |
| 953 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 954 | { |
| 955 | self = self.inc_call_check_limit()?; |
| 956 | match f(self) { |
| 957 | Ok(state) | Err(state) => Ok(state), |
| 958 | } |
| 959 | } |
| 960 | |
| 961 | /// Generic function to handle result of char/string/range parsing |
| 962 | /// in order to track (un)expected tokens. |
| 963 | fn handle_token_parse_result( |
| 964 | &mut self, |
| 965 | start_position: usize, |
| 966 | token: ParsingToken, |
| 967 | parse_succeeded: bool, |
| 968 | ) { |
| 969 | // New position after tracked parsed element for case of `parse_succeded` is true. |
| 970 | // Position of parsing failure otherwise. |
| 971 | let current_pos = self.position.pos(); |
| 972 | |
| 973 | if parse_succeeded { |
| 974 | if self.lookahead == Lookahead::Negative { |
| 975 | self.parse_attempts |
| 976 | .try_add_new_token(token, start_position, current_pos, true); |
| 977 | } else if current_pos > self.parse_attempts.max_position { |
| 978 | self.parse_attempts.nullify_expected_tokens(current_pos); |
| 979 | } |
| 980 | } else if self.lookahead != Lookahead::Negative { |
| 981 | self.parse_attempts |
| 982 | .try_add_new_token(token, start_position, current_pos, false); |
| 983 | } |
| 984 | } |
| 985 | |
| 986 | /// Attempts to match a single character based on a filter function. Returns `Ok` with the |
| 987 | /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` |
| 988 | /// otherwise. |
| 989 | /// |
| 990 | /// # Examples |
| 991 | /// |
| 992 | /// ``` |
| 993 | /// # use pest; |
| 994 | /// # #[allow (non_camel_case_types)] |
| 995 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 996 | /// enum Rule {} |
| 997 | /// |
| 998 | /// let input = "ab" ; |
| 999 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1000 | /// let result = state.match_char_by(|c| c.is_ascii()); |
| 1001 | /// assert!(result.is_ok()); |
| 1002 | /// assert_eq!(result.unwrap().position().pos(), 1); |
| 1003 | /// |
| 1004 | /// let input = "❤" ; |
| 1005 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1006 | /// let result = state.match_char_by(|c| c.is_ascii()); |
| 1007 | /// assert!(result.is_err()); |
| 1008 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
| 1009 | /// ``` |
| 1010 | #[inline ] |
| 1011 | pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
| 1012 | where |
| 1013 | F: FnOnce(char) -> bool, |
| 1014 | { |
| 1015 | let start_position = self.position.pos(); |
| 1016 | let succeeded = self.position.match_char_by(f); |
| 1017 | if self.parse_attempts.enabled { |
| 1018 | let token = ParsingToken::BuiltInRule; |
| 1019 | self.handle_token_parse_result(start_position, token, succeeded); |
| 1020 | } |
| 1021 | if succeeded { |
| 1022 | Ok(self) |
| 1023 | } else { |
| 1024 | Err(self) |
| 1025 | } |
| 1026 | } |
| 1027 | |
| 1028 | /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if |
| 1029 | /// successful, or `Err` with the updated `Box<ParserState>` otherwise. |
| 1030 | /// |
| 1031 | /// # Examples |
| 1032 | /// |
| 1033 | /// ``` |
| 1034 | /// # use pest; |
| 1035 | /// # #[allow (non_camel_case_types)] |
| 1036 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1037 | /// enum Rule {} |
| 1038 | /// |
| 1039 | /// let input = "ab" ; |
| 1040 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1041 | /// let mut result = state.match_string("ab" ); |
| 1042 | /// assert!(result.is_ok()); |
| 1043 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 1044 | /// |
| 1045 | /// state = pest::ParserState::new(input); |
| 1046 | /// result = state.match_string("ac" ); |
| 1047 | /// assert!(result.is_err()); |
| 1048 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
| 1049 | /// ``` |
| 1050 | #[inline ] |
| 1051 | pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> { |
| 1052 | let start_position = self.position.pos(); |
| 1053 | let succeeded = self.position.match_string(string); |
| 1054 | if self.parse_attempts.enabled { |
| 1055 | let token = ParsingToken::Sensitive { |
| 1056 | token: String::from(string), |
| 1057 | }; |
| 1058 | self.handle_token_parse_result(start_position, token, succeeded); |
| 1059 | } |
| 1060 | if succeeded { |
| 1061 | Ok(self) |
| 1062 | } else { |
| 1063 | Err(self) |
| 1064 | } |
| 1065 | } |
| 1066 | |
| 1067 | /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated |
| 1068 | /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
| 1069 | /// |
| 1070 | /// # Examples |
| 1071 | /// |
| 1072 | /// ``` |
| 1073 | /// # use pest; |
| 1074 | /// # #[allow (non_camel_case_types)] |
| 1075 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1076 | /// enum Rule {} |
| 1077 | /// |
| 1078 | /// let input = "ab" ; |
| 1079 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1080 | /// let mut result = state.match_insensitive("AB" ); |
| 1081 | /// assert!(result.is_ok()); |
| 1082 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 1083 | /// |
| 1084 | /// state = pest::ParserState::new(input); |
| 1085 | /// result = state.match_insensitive("AC" ); |
| 1086 | /// assert!(result.is_err()); |
| 1087 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
| 1088 | /// ``` |
| 1089 | #[inline ] |
| 1090 | pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> { |
| 1091 | let start_position: usize = self.position().pos(); |
| 1092 | let succeeded = self.position.match_insensitive(string); |
| 1093 | if self.parse_attempts.enabled { |
| 1094 | let token = ParsingToken::Insensitive { |
| 1095 | token: String::from(string), |
| 1096 | }; |
| 1097 | self.handle_token_parse_result(start_position, token, succeeded); |
| 1098 | } |
| 1099 | if succeeded { |
| 1100 | Ok(self) |
| 1101 | } else { |
| 1102 | Err(self) |
| 1103 | } |
| 1104 | } |
| 1105 | |
| 1106 | /// Attempts to match a single character from the given range. Returns `Ok` with the updated |
| 1107 | /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
| 1108 | /// |
| 1109 | /// # Caution |
| 1110 | /// The provided `range` is interpreted as inclusive. |
| 1111 | /// |
| 1112 | /// # Examples |
| 1113 | /// |
| 1114 | /// ``` |
| 1115 | /// # use pest; |
| 1116 | /// # #[allow (non_camel_case_types)] |
| 1117 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1118 | /// enum Rule {} |
| 1119 | /// |
| 1120 | /// let input = "ab" ; |
| 1121 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1122 | /// let mut result = state.match_range('a' ..'z' ); |
| 1123 | /// assert!(result.is_ok()); |
| 1124 | /// assert_eq!(result.unwrap().position().pos(), 1); |
| 1125 | /// |
| 1126 | /// state = pest::ParserState::new(input); |
| 1127 | /// result = state.match_range('A' ..'Z' ); |
| 1128 | /// assert!(result.is_err()); |
| 1129 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
| 1130 | /// ``` |
| 1131 | #[inline ] |
| 1132 | pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> { |
| 1133 | let start_position = self.position().pos(); |
| 1134 | let token = ParsingToken::Range { |
| 1135 | start: range.start, |
| 1136 | end: range.end, |
| 1137 | }; |
| 1138 | let succeeded = self.position.match_range(range); |
| 1139 | if self.parse_attempts.enabled { |
| 1140 | self.handle_token_parse_result(start_position, token, succeeded); |
| 1141 | } |
| 1142 | if succeeded { |
| 1143 | Ok(self) |
| 1144 | } else { |
| 1145 | Err(self) |
| 1146 | } |
| 1147 | } |
| 1148 | |
| 1149 | /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>` |
| 1150 | /// if successful, or `Err` with the updated `Box<ParserState>` otherwise. |
| 1151 | /// |
| 1152 | /// # Examples |
| 1153 | /// |
| 1154 | /// ``` |
| 1155 | /// # use pest; |
| 1156 | /// # #[allow (non_camel_case_types)] |
| 1157 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1158 | /// enum Rule {} |
| 1159 | /// |
| 1160 | /// let input = "ab" ; |
| 1161 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1162 | /// let mut result = state.skip(1); |
| 1163 | /// assert!(result.is_ok()); |
| 1164 | /// assert_eq!(result.unwrap().position().pos(), 1); |
| 1165 | /// |
| 1166 | /// state = pest::ParserState::new(input); |
| 1167 | /// result = state.skip(3); |
| 1168 | /// assert!(result.is_err()); |
| 1169 | /// assert_eq!(result.unwrap_err().position().pos(), 0); |
| 1170 | /// ``` |
| 1171 | #[inline ] |
| 1172 | pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> { |
| 1173 | if self.position.skip(n) { |
| 1174 | Ok(self) |
| 1175 | } else { |
| 1176 | Err(self) |
| 1177 | } |
| 1178 | } |
| 1179 | |
| 1180 | /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the |
| 1181 | /// updated `Box<ParserState>` whether or not one of the strings is found. |
| 1182 | /// |
| 1183 | /// # Examples |
| 1184 | /// |
| 1185 | /// ``` |
| 1186 | /// # use pest; |
| 1187 | /// # #[allow (non_camel_case_types)] |
| 1188 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1189 | /// enum Rule {} |
| 1190 | /// |
| 1191 | /// let input = "abcd" ; |
| 1192 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1193 | /// let mut result = state.skip_until(&["c" , "d" ]); |
| 1194 | /// assert!(result.is_ok()); |
| 1195 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 1196 | /// ``` |
| 1197 | #[inline ] |
| 1198 | pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> { |
| 1199 | self.position.skip_until(strings); |
| 1200 | Ok(self) |
| 1201 | } |
| 1202 | |
| 1203 | /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>` |
| 1204 | /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise. |
| 1205 | /// |
| 1206 | /// # Examples |
| 1207 | /// |
| 1208 | /// ``` |
| 1209 | /// # use pest; |
| 1210 | /// # #[allow (non_camel_case_types)] |
| 1211 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1212 | /// enum Rule {} |
| 1213 | /// |
| 1214 | /// let input = "ab" ; |
| 1215 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1216 | /// let mut result = state.start_of_input(); |
| 1217 | /// assert!(result.is_ok()); |
| 1218 | /// |
| 1219 | /// state = pest::ParserState::new(input); |
| 1220 | /// state = state.match_string("ab" ).unwrap(); |
| 1221 | /// result = state.start_of_input(); |
| 1222 | /// assert!(result.is_err()); |
| 1223 | /// ``` |
| 1224 | #[inline ] |
| 1225 | pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1226 | if self.position.at_start() { |
| 1227 | Ok(self) |
| 1228 | } else { |
| 1229 | Err(self) |
| 1230 | } |
| 1231 | } |
| 1232 | |
| 1233 | /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if |
| 1234 | /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise. |
| 1235 | /// |
| 1236 | /// # Examples |
| 1237 | /// |
| 1238 | /// ``` |
| 1239 | /// # use pest; |
| 1240 | /// # #[allow (non_camel_case_types)] |
| 1241 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1242 | /// enum Rule {} |
| 1243 | /// |
| 1244 | /// let input = "ab" ; |
| 1245 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1246 | /// let mut result = state.end_of_input(); |
| 1247 | /// assert!(result.is_err()); |
| 1248 | /// |
| 1249 | /// state = pest::ParserState::new(input); |
| 1250 | /// state = state.match_string("ab" ).unwrap(); |
| 1251 | /// result = state.end_of_input(); |
| 1252 | /// assert!(result.is_ok()); |
| 1253 | /// ``` |
| 1254 | #[inline ] |
| 1255 | pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1256 | if self.position.at_end() { |
| 1257 | Ok(self) |
| 1258 | } else { |
| 1259 | Err(self) |
| 1260 | } |
| 1261 | } |
| 1262 | |
| 1263 | /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns |
| 1264 | /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current |
| 1265 | /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err` |
| 1266 | /// together, negating the `Result`. |
| 1267 | /// |
| 1268 | /// # Examples |
| 1269 | /// |
| 1270 | /// ``` |
| 1271 | /// # use pest; |
| 1272 | /// # #[allow (non_camel_case_types)] |
| 1273 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1274 | /// enum Rule { |
| 1275 | /// a |
| 1276 | /// } |
| 1277 | /// |
| 1278 | /// let input = "a" ; |
| 1279 | /// let pairs: Vec<_> = pest::state(input, |state| { |
| 1280 | /// state.lookahead(true, |state| { |
| 1281 | /// state.rule(Rule::a, |s| Ok(s)) |
| 1282 | /// }) |
| 1283 | /// }).unwrap().collect(); |
| 1284 | /// |
| 1285 | /// assert_eq!(pairs.len(), 0); |
| 1286 | /// ``` |
| 1287 | #[inline ] |
| 1288 | pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>> |
| 1289 | where |
| 1290 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 1291 | { |
| 1292 | self = self.inc_call_check_limit()?; |
| 1293 | let initial_lookahead = self.lookahead; |
| 1294 | |
| 1295 | self.lookahead = if is_positive { |
| 1296 | match initial_lookahead { |
| 1297 | Lookahead::None | Lookahead::Positive => Lookahead::Positive, |
| 1298 | Lookahead::Negative => Lookahead::Negative, |
| 1299 | } |
| 1300 | } else { |
| 1301 | match initial_lookahead { |
| 1302 | Lookahead::None | Lookahead::Positive => Lookahead::Negative, |
| 1303 | Lookahead::Negative => Lookahead::Positive, |
| 1304 | } |
| 1305 | }; |
| 1306 | |
| 1307 | let initial_pos = self.position; |
| 1308 | |
| 1309 | let result = f(self.checkpoint()); |
| 1310 | |
| 1311 | let result_state = match result { |
| 1312 | Ok(mut new_state) => { |
| 1313 | new_state.position = initial_pos; |
| 1314 | new_state.lookahead = initial_lookahead; |
| 1315 | Ok(new_state.restore()) |
| 1316 | } |
| 1317 | Err(mut new_state) => { |
| 1318 | new_state.position = initial_pos; |
| 1319 | new_state.lookahead = initial_lookahead; |
| 1320 | Err(new_state.restore()) |
| 1321 | } |
| 1322 | }; |
| 1323 | |
| 1324 | if is_positive { |
| 1325 | result_state |
| 1326 | } else { |
| 1327 | match result_state { |
| 1328 | Ok(state) => Err(state), |
| 1329 | Err(state) => Ok(state), |
| 1330 | } |
| 1331 | } |
| 1332 | } |
| 1333 | |
| 1334 | /// Transformation which stops `Token`s from being generated according to `is_atomic`. |
| 1335 | /// Used as wrapper over `rule` (or even another `atomic`) call. |
| 1336 | /// |
| 1337 | /// # Examples |
| 1338 | /// |
| 1339 | /// ``` |
| 1340 | /// # use pest::{self, Atomicity}; |
| 1341 | /// # #[allow (non_camel_case_types)] |
| 1342 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1343 | /// enum Rule { |
| 1344 | /// a |
| 1345 | /// } |
| 1346 | /// |
| 1347 | /// let input = "a" ; |
| 1348 | /// let pairs: Vec<_> = pest::state(input, |state| { |
| 1349 | /// state.atomic(Atomicity::Atomic, |s| { |
| 1350 | /// s.rule(Rule::a, |s| Ok(s)) |
| 1351 | /// }) |
| 1352 | /// }).unwrap().collect(); |
| 1353 | /// |
| 1354 | /// assert_eq!(pairs.len(), 0); |
| 1355 | /// ``` |
| 1356 | #[inline ] |
| 1357 | pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>> |
| 1358 | where |
| 1359 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 1360 | { |
| 1361 | self = self.inc_call_check_limit()?; |
| 1362 | // In case child parsing call is another `atomic` it will have its own atomicity status. |
| 1363 | let initial_atomicity = self.atomicity; |
| 1364 | // In case child atomicity is the same as we've demanded, we shouldn't do nothing. |
| 1365 | // E.g. we have the following rules: |
| 1366 | // * RootRule = @{ InnerRule } |
| 1367 | // * InnerRule = @{ ... } |
| 1368 | let should_toggle = self.atomicity != atomicity; |
| 1369 | |
| 1370 | // Note that we take atomicity of the top rule and not of the leaf (inner). |
| 1371 | if should_toggle { |
| 1372 | self.atomicity = atomicity; |
| 1373 | } |
| 1374 | |
| 1375 | let result = f(self); |
| 1376 | |
| 1377 | match result { |
| 1378 | Ok(mut new_state) => { |
| 1379 | if should_toggle { |
| 1380 | new_state.atomicity = initial_atomicity; |
| 1381 | } |
| 1382 | Ok(new_state) |
| 1383 | } |
| 1384 | Err(mut new_state) => { |
| 1385 | if should_toggle { |
| 1386 | new_state.atomicity = initial_atomicity; |
| 1387 | } |
| 1388 | Err(new_state) |
| 1389 | } |
| 1390 | } |
| 1391 | } |
| 1392 | |
| 1393 | /// Evaluates the result of closure `f` and pushes the span of the input consumed from before |
| 1394 | /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is |
| 1395 | /// called successfully, or `Err(Box<ParserState>)` otherwise. |
| 1396 | /// |
| 1397 | /// # Examples |
| 1398 | /// |
| 1399 | /// ``` |
| 1400 | /// # use pest; |
| 1401 | /// # #[allow (non_camel_case_types)] |
| 1402 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1403 | /// enum Rule {} |
| 1404 | /// |
| 1405 | /// let input = "ab" ; |
| 1406 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1407 | /// let mut result = state.stack_push(|state| state.match_string("a" )); |
| 1408 | /// assert!(result.is_ok()); |
| 1409 | /// assert_eq!(result.unwrap().position().pos(), 1); |
| 1410 | /// ``` |
| 1411 | #[inline ] |
| 1412 | pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
| 1413 | where |
| 1414 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 1415 | { |
| 1416 | self = self.inc_call_check_limit()?; |
| 1417 | let start = self.position; |
| 1418 | |
| 1419 | let result = f(self); |
| 1420 | |
| 1421 | match result { |
| 1422 | Ok(mut state) => { |
| 1423 | let end = state.position; |
| 1424 | state.stack.push(start.span(&end)); |
| 1425 | Ok(state) |
| 1426 | } |
| 1427 | Err(state) => Err(state), |
| 1428 | } |
| 1429 | } |
| 1430 | |
| 1431 | /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)` |
| 1432 | /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise. |
| 1433 | /// |
| 1434 | /// # Examples |
| 1435 | /// |
| 1436 | /// ``` |
| 1437 | /// # use pest; |
| 1438 | /// # #[allow (non_camel_case_types)] |
| 1439 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1440 | /// enum Rule {} |
| 1441 | /// |
| 1442 | /// let input = "aa" ; |
| 1443 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1444 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
| 1445 | /// |state| state.stack_peek() |
| 1446 | /// ); |
| 1447 | /// assert!(result.is_ok()); |
| 1448 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 1449 | /// ``` |
| 1450 | #[inline ] |
| 1451 | pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1452 | let string = self |
| 1453 | .stack |
| 1454 | .peek() |
| 1455 | .expect("peek was called on empty stack" ) |
| 1456 | .as_str(); |
| 1457 | self.match_string(string) |
| 1458 | } |
| 1459 | |
| 1460 | /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)` |
| 1461 | /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise. |
| 1462 | /// |
| 1463 | /// # Examples |
| 1464 | /// |
| 1465 | /// ``` |
| 1466 | /// # use pest; |
| 1467 | /// # #[allow (non_camel_case_types)] |
| 1468 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1469 | /// enum Rule {} |
| 1470 | /// |
| 1471 | /// let input = "aa" ; |
| 1472 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1473 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
| 1474 | /// |state| state.stack_pop() |
| 1475 | /// ); |
| 1476 | /// assert!(result.is_ok()); |
| 1477 | /// assert_eq!(result.unwrap().position().pos(), 2); |
| 1478 | /// ``` |
| 1479 | #[inline ] |
| 1480 | pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1481 | let string = self |
| 1482 | .stack |
| 1483 | .pop() |
| 1484 | .expect("pop was called on empty stack" ) |
| 1485 | .as_str(); |
| 1486 | self.match_string(string) |
| 1487 | } |
| 1488 | |
| 1489 | /// Matches part of the state of the stack. |
| 1490 | /// |
| 1491 | /// # Examples |
| 1492 | /// |
| 1493 | /// ``` |
| 1494 | /// # use pest::{self, MatchDir}; |
| 1495 | /// # #[allow (non_camel_case_types)] |
| 1496 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1497 | /// enum Rule {} |
| 1498 | /// |
| 1499 | /// let input = "abcd cd cb" ; |
| 1500 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1501 | /// let mut result = state |
| 1502 | /// .stack_push(|state| state.match_string("a" )) |
| 1503 | /// .and_then(|state| state.stack_push(|state| state.match_string("b" ))) |
| 1504 | /// .and_then(|state| state.stack_push(|state| state.match_string("c" ))) |
| 1505 | /// .and_then(|state| state.stack_push(|state| state.match_string("d" ))) |
| 1506 | /// .and_then(|state| state.match_string(" " )) |
| 1507 | /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop)) |
| 1508 | /// .and_then(|state| state.match_string(" " )) |
| 1509 | /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom)); |
| 1510 | /// assert!(result.is_ok()); |
| 1511 | /// assert_eq!(result.unwrap().position().pos(), 10); |
| 1512 | /// ``` |
| 1513 | #[inline ] |
| 1514 | pub fn stack_match_peek_slice( |
| 1515 | mut self: Box<Self>, |
| 1516 | start: i32, |
| 1517 | end: Option<i32>, |
| 1518 | match_dir: MatchDir, |
| 1519 | ) -> ParseResult<Box<Self>> { |
| 1520 | let range = match constrain_idxs(start, end, self.stack.len()) { |
| 1521 | Some(r) => r, |
| 1522 | None => return Err(self), |
| 1523 | }; |
| 1524 | // return true if an empty sequence is requested |
| 1525 | if range.end <= range.start { |
| 1526 | return Ok(self); |
| 1527 | } |
| 1528 | |
| 1529 | let mut position = self.position; |
| 1530 | let result = { |
| 1531 | let mut iter_b2t = self.stack[range].iter(); |
| 1532 | let matcher = |span: &Span<'_>| position.match_string(span.as_str()); |
| 1533 | match match_dir { |
| 1534 | MatchDir::BottomToTop => iter_b2t.all(matcher), |
| 1535 | MatchDir::TopToBottom => iter_b2t.rev().all(matcher), |
| 1536 | } |
| 1537 | }; |
| 1538 | if result { |
| 1539 | self.position = position; |
| 1540 | Ok(self) |
| 1541 | } else { |
| 1542 | Err(self) |
| 1543 | } |
| 1544 | } |
| 1545 | |
| 1546 | /// Matches the full state of the stack. |
| 1547 | /// |
| 1548 | /// # Examples |
| 1549 | /// |
| 1550 | /// ``` |
| 1551 | /// # use pest; |
| 1552 | /// # #[allow (non_camel_case_types)] |
| 1553 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1554 | /// enum Rule {} |
| 1555 | /// |
| 1556 | /// let input = "abba" ; |
| 1557 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1558 | /// let mut result = state |
| 1559 | /// .stack_push(|state| state.match_string("a" )) |
| 1560 | /// .and_then(|state| { state.stack_push(|state| state.match_string("b" )) }) |
| 1561 | /// .and_then(|state| state.stack_match_peek()); |
| 1562 | /// assert!(result.is_ok()); |
| 1563 | /// assert_eq!(result.unwrap().position().pos(), 4); |
| 1564 | /// ``` |
| 1565 | #[inline ] |
| 1566 | pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1567 | self.stack_match_peek_slice(0, None, MatchDir::TopToBottom) |
| 1568 | } |
| 1569 | |
| 1570 | /// Matches the full state of the stack. This method will clear the stack as it evaluates. |
| 1571 | /// |
| 1572 | /// # Examples |
| 1573 | /// |
| 1574 | /// ``` |
| 1575 | /// /// # use pest; |
| 1576 | /// # #[allow (non_camel_case_types)] |
| 1577 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1578 | /// enum Rule {} |
| 1579 | /// |
| 1580 | /// let input = "aaaa" ; |
| 1581 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1582 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then(|state| { |
| 1583 | /// state.stack_push(|state| state.match_string("a" )) |
| 1584 | /// }).and_then(|state| state.stack_match_peek()); |
| 1585 | /// assert!(result.is_ok()); |
| 1586 | /// assert_eq!(result.unwrap().position().pos(), 4); |
| 1587 | /// ``` |
| 1588 | #[inline ] |
| 1589 | pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1590 | let mut position = self.position; |
| 1591 | let mut result = true; |
| 1592 | while let Some(span) = self.stack.pop() { |
| 1593 | result = position.match_string(span.as_str()); |
| 1594 | if !result { |
| 1595 | break; |
| 1596 | } |
| 1597 | } |
| 1598 | |
| 1599 | if result { |
| 1600 | self.position = position; |
| 1601 | Ok(self) |
| 1602 | } else { |
| 1603 | Err(self) |
| 1604 | } |
| 1605 | } |
| 1606 | |
| 1607 | /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or |
| 1608 | /// `Err(Box<ParserState>)` otherwise. |
| 1609 | /// |
| 1610 | /// # Examples |
| 1611 | /// |
| 1612 | /// ``` |
| 1613 | /// # use pest; |
| 1614 | /// # #[allow (non_camel_case_types)] |
| 1615 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1616 | /// enum Rule {} |
| 1617 | /// |
| 1618 | /// let input = "aa" ; |
| 1619 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1620 | /// let mut result = state.stack_push(|state| state.match_string("a" )).and_then( |
| 1621 | /// |state| state.stack_drop() |
| 1622 | /// ); |
| 1623 | /// assert!(result.is_ok()); |
| 1624 | /// assert_eq!(result.unwrap().position().pos(), 1); |
| 1625 | /// ``` |
| 1626 | #[inline ] |
| 1627 | pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> { |
| 1628 | match self.stack.pop() { |
| 1629 | Some(_) => Ok(self), |
| 1630 | None => Err(self), |
| 1631 | } |
| 1632 | } |
| 1633 | |
| 1634 | /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently, |
| 1635 | /// this method only restores the stack. |
| 1636 | /// |
| 1637 | /// # Examples |
| 1638 | /// |
| 1639 | /// ``` |
| 1640 | /// # use pest; |
| 1641 | /// # #[allow (non_camel_case_types)] |
| 1642 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 1643 | /// enum Rule {} |
| 1644 | /// |
| 1645 | /// let input = "ab" ; |
| 1646 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input); |
| 1647 | /// let mut result = state.restore_on_err(|state| state.stack_push(|state| |
| 1648 | /// state.match_string("a" )).and_then(|state| state.match_string("a" )) |
| 1649 | /// ); |
| 1650 | /// |
| 1651 | /// assert!(result.is_err()); |
| 1652 | /// |
| 1653 | /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed. |
| 1654 | /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop()); |
| 1655 | /// assert!(catch_panic.is_err()); |
| 1656 | /// ``` |
| 1657 | #[inline ] |
| 1658 | pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> |
| 1659 | where |
| 1660 | F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>, |
| 1661 | { |
| 1662 | match f(self.checkpoint()) { |
| 1663 | Ok(state) => Ok(state.checkpoint_ok()), |
| 1664 | Err(state) => Err(state.restore()), |
| 1665 | } |
| 1666 | } |
| 1667 | |
| 1668 | // Mark the current state as a checkpoint and return the `Box`. |
| 1669 | #[inline ] |
| 1670 | pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> { |
| 1671 | self.stack.snapshot(); |
| 1672 | self |
| 1673 | } |
| 1674 | |
| 1675 | // The checkpoint was cleared successfully |
| 1676 | // so remove it without touching other stack state. |
| 1677 | #[inline ] |
| 1678 | pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> { |
| 1679 | self.stack.clear_snapshot(); |
| 1680 | self |
| 1681 | } |
| 1682 | |
| 1683 | // Restore the current state to the most recent checkpoint. |
| 1684 | #[inline ] |
| 1685 | pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> { |
| 1686 | self.stack.restore(); |
| 1687 | self |
| 1688 | } |
| 1689 | } |
| 1690 | |
| 1691 | /// Helper function used only in case stack operations (PUSH/POP) are used in grammar. |
| 1692 | fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> { |
| 1693 | let start_norm: usize = normalize_index(i:start, len)?; |
| 1694 | let end_norm: usize = end.map_or(default:Some(len), |e: i32| normalize_index(i:e, len))?; |
| 1695 | Some(start_norm..end_norm) |
| 1696 | } |
| 1697 | |
| 1698 | /// `constrain_idxs` helper function. |
| 1699 | /// Normalizes the index using its sequence’s length. |
| 1700 | /// Returns `None` if the normalized index is OOB. |
| 1701 | fn normalize_index(i: i32, len: usize) -> Option<usize> { |
| 1702 | if i > len as i32 { |
| 1703 | None |
| 1704 | } else if i >= 0 { |
| 1705 | Some(i as usize) |
| 1706 | } else { |
| 1707 | let real_i: i32 = len as i32 + i; |
| 1708 | if real_i >= 0 { |
| 1709 | Some(real_i as usize) |
| 1710 | } else { |
| 1711 | None |
| 1712 | } |
| 1713 | } |
| 1714 | } |
| 1715 | |
| 1716 | #[cfg (test)] |
| 1717 | mod test { |
| 1718 | use super::*; |
| 1719 | |
| 1720 | #[test ] |
| 1721 | fn normalize_index_pos() { |
| 1722 | assert_eq!(normalize_index(4, 6), Some(4)); |
| 1723 | assert_eq!(normalize_index(5, 5), Some(5)); |
| 1724 | assert_eq!(normalize_index(6, 3), None); |
| 1725 | } |
| 1726 | |
| 1727 | #[test ] |
| 1728 | fn normalize_index_neg() { |
| 1729 | assert_eq!(normalize_index(-4, 6), Some(2)); |
| 1730 | assert_eq!(normalize_index(-5, 5), Some(0)); |
| 1731 | assert_eq!(normalize_index(-6, 3), None); |
| 1732 | } |
| 1733 | } |
| 1734 | |