1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use core::cmp::Ordering;
11use core::fmt;
12use core::hash::{Hash, Hasher};
13use core::ops::Range;
14use core::ptr;
15use core::str;
16
17use crate::span;
18
19/// A cursor position in a `&str` which provides useful methods to manually parse that string.
20#[derive(Clone, Copy)]
21pub struct Position<'i> {
22 input: &'i str,
23 pos: usize,
24}
25
26impl<'i> Position<'i> {
27 /// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
28 pub(crate) fn new_internal(input: &str, pos: usize) -> Position<'_> {
29 debug_assert!(input.get(pos..).is_some());
30 Position { input, pos }
31 }
32
33 /// Attempts to create a new `Position` at the given position. If the specified position is
34 /// an invalid index, or the specified position is not a valid UTF8 boundary, then None is
35 /// returned.
36 ///
37 /// # Examples
38 /// ```
39 /// # use pest::Position;
40 /// let cheart = '💖';
41 /// let heart = "💖";
42 /// assert_eq!(Position::new(heart, 1), None);
43 /// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
44 /// ```
45 pub fn new(input: &str, pos: usize) -> Option<Position<'_>> {
46 input.get(pos..).map(|_| Position { input, pos })
47 }
48
49 /// Creates a `Position` at the start of a `&str`.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use pest::Position;
55 /// let start = Position::from_start("");
56 /// assert_eq!(start.pos(), 0);
57 /// ```
58 #[inline]
59 pub fn from_start(input: &'i str) -> Position<'i> {
60 // Position 0 is always safe because it's always a valid UTF-8 border.
61 Position { input, pos: 0 }
62 }
63
64 /// Returns the byte position of this `Position` as a `usize`.
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// # use pest::Position;
70 /// let input = "ab";
71 /// let mut start = Position::from_start(input);
72 ///
73 /// assert_eq!(start.pos(), 0);
74 /// ```
75 #[inline]
76 pub fn pos(&self) -> usize {
77 self.pos
78 }
79
80 /// Creates a `Span` from two `Position`s.
81 ///
82 /// # Panics
83 ///
84 /// Panics if the positions come from different inputs.
85 ///
86 /// # Examples
87 ///
88 /// ```
89 /// # use pest::Position;
90 /// let input = "ab";
91 /// let start = Position::from_start(input);
92 /// let span = start.span(&start.clone());
93 ///
94 /// assert_eq!(span.start(), 0);
95 /// assert_eq!(span.end(), 0);
96 /// ```
97 #[inline]
98 pub fn span(&self, other: &Position<'i>) -> span::Span<'i> {
99 if ptr::eq(self.input, other.input)
100 /* && self.input.get(self.pos..other.pos).is_some() */
101 {
102 span::Span::new_internal(self.input, self.pos, other.pos)
103 } else {
104 // TODO: maybe a panic if self.pos < other.pos
105 panic!("span created from positions from different inputs")
106 }
107 }
108
109 /// Returns the line and column number of this `Position`.
110 ///
111 /// This is an O(n) operation, where n is the number of chars in the input.
112 /// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead.
113 ///
114 /// # Examples
115 ///
116 /// ```
117 /// # use pest;
118 /// # #[allow(non_camel_case_types)]
119 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
120 /// enum Rule {}
121 ///
122 /// let input = "\na";
123 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
124 /// let mut result = state.match_string("\na");
125 /// assert!(result.is_ok());
126 /// assert_eq!(result.unwrap().position().line_col(), (2, 2));
127 /// ```
128 #[inline]
129 pub fn line_col(&self) -> (usize, usize) {
130 if self.pos > self.input.len() {
131 panic!("position out of bounds");
132 }
133 let mut pos = self.pos;
134 let slice = &self.input[..pos];
135 let mut chars = slice.chars().peekable();
136
137 let mut line_col = (1, 1);
138
139 while pos != 0 {
140 match chars.next() {
141 Some('\r') => {
142 if let Some(&'\n') = chars.peek() {
143 chars.next();
144
145 if pos == 1 {
146 pos -= 1;
147 } else {
148 pos -= 2;
149 }
150
151 line_col = (line_col.0 + 1, 1);
152 } else {
153 pos -= 1;
154 line_col = (line_col.0, line_col.1 + 1);
155 }
156 }
157 Some('\n') => {
158 pos -= 1;
159 line_col = (line_col.0 + 1, 1);
160 }
161 Some(c) => {
162 pos -= c.len_utf8();
163 line_col = (line_col.0, line_col.1 + 1);
164 }
165 None => unreachable!(),
166 }
167 }
168
169 line_col
170 }
171
172 /// Returns the entire line of the input that contains this `Position`.
173 ///
174 /// # Examples
175 ///
176 /// ```
177 /// # use pest;
178 /// # #[allow(non_camel_case_types)]
179 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
180 /// enum Rule {}
181 ///
182 /// let input = "\na";
183 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
184 /// let mut result = state.match_string("\na");
185 /// assert!(result.is_ok());
186 /// assert_eq!(result.unwrap().position().line_of(), "a");
187 /// ```
188 #[inline]
189 pub fn line_of(&self) -> &'i str {
190 if self.pos > self.input.len() {
191 panic!("position out of bounds");
192 };
193 // Safe since start and end can only be valid UTF-8 borders.
194 &self.input[self.find_line_start()..self.find_line_end()]
195 }
196
197 pub(crate) fn find_line_start(&self) -> usize {
198 if self.input.is_empty() {
199 return 0;
200 };
201 // Position's pos is always a UTF-8 border.
202 let start = self
203 .input
204 .char_indices()
205 .rev()
206 .skip_while(|&(i, _)| i >= self.pos)
207 .find(|&(_, c)| c == '\n');
208 match start {
209 Some((i, _)) => i + 1,
210 None => 0,
211 }
212 }
213
214 pub(crate) fn find_line_end(&self) -> usize {
215 if self.input.is_empty() {
216 0
217 } else if self.pos == self.input.len() - 1 {
218 self.input.len()
219 } else {
220 // Position's pos is always a UTF-8 border.
221 let end = self
222 .input
223 .char_indices()
224 .skip_while(|&(i, _)| i < self.pos)
225 .find(|&(_, c)| c == '\n');
226 match end {
227 Some((i, _)) => i + 1,
228 None => self.input.len(),
229 }
230 }
231 }
232
233 /// Returns `true` when the `Position` points to the start of the input `&str`.
234 #[inline]
235 pub(crate) fn at_start(&self) -> bool {
236 self.pos == 0
237 }
238
239 /// Returns `true` when the `Position` points to the end of the input `&str`.
240 #[inline]
241 pub(crate) fn at_end(&self) -> bool {
242 self.pos == self.input.len()
243 }
244
245 /// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
246 /// otherwise. If the return value is `false`, `pos` will not be updated.
247 #[inline]
248 pub(crate) fn skip(&mut self, n: usize) -> bool {
249 let skipped = {
250 let mut len = 0;
251 // Position's pos is always a UTF-8 border.
252 let mut chars = self.input[self.pos..].chars();
253 for _ in 0..n {
254 if let Some(c) = chars.next() {
255 len += c.len_utf8();
256 } else {
257 return false;
258 }
259 }
260 len
261 };
262
263 self.pos += skipped;
264 true
265 }
266
267 /// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
268 /// otherwise. If the return value is `false`, `pos` will not be updated.
269 #[inline]
270 pub(crate) fn skip_back(&mut self, n: usize) -> bool {
271 let skipped = {
272 let mut len = 0;
273 // Position's pos is always a UTF-8 border.
274 let mut chars = self.input[..self.pos].chars().rev();
275 for _ in 0..n {
276 if let Some(c) = chars.next() {
277 len += c.len_utf8();
278 } else {
279 return false;
280 }
281 }
282 len
283 };
284
285 self.pos -= skipped;
286 true
287 }
288
289 /// Skips until one of the given `strings` is found. If none of the `strings` can be found,
290 /// this function will return `false` but its `pos` will *still* be updated.
291 #[inline]
292 pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
293 #[cfg(not(feature = "memchr"))]
294 {
295 self.skip_until_basic(strings)
296 }
297 #[cfg(feature = "memchr")]
298 {
299 match strings {
300 [] => (),
301 [s1] => {
302 if let Some(from) =
303 memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes())
304 {
305 self.pos += from;
306 return true;
307 }
308 }
309 [s1, s2] if !s1.is_empty() && !s2.is_empty() => {
310 let b1 = s1.as_bytes()[0];
311 let b2 = s2.as_bytes()[0];
312 let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]);
313 for from in miter {
314 let start = &self.input[self.pos + from..];
315 if start.starts_with(s1) || start.starts_with(s2) {
316 self.pos += from;
317 return true;
318 }
319 }
320 }
321 [s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => {
322 let b1 = s1.as_bytes()[0];
323 let b2 = s2.as_bytes()[0];
324 let b3 = s2.as_bytes()[0];
325 let miter =
326 memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]);
327 for from in miter {
328 let start = &self.input[self.pos + from..];
329 if start.starts_with(s1) || start.starts_with(s2) || start.starts_with(s3) {
330 self.pos += from;
331 return true;
332 }
333 }
334 }
335 _ => {
336 return self.skip_until_basic(strings);
337 }
338 }
339 self.pos = self.input.len();
340 false
341 }
342 }
343
344 #[inline]
345 fn skip_until_basic(&mut self, strings: &[&str]) -> bool {
346 // TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse?
347 for from in self.pos..self.input.len() {
348 let bytes = if let Some(string) = self.input.get(from..) {
349 string.as_bytes()
350 } else {
351 continue;
352 };
353
354 for slice in strings.iter() {
355 let to = slice.len();
356 if Some(slice.as_bytes()) == bytes.get(0..to) {
357 self.pos = from;
358 return true;
359 }
360 }
361 }
362
363 self.pos = self.input.len();
364 false
365 }
366
367 /// Matches the char at the `Position` against a specified character and returns `true` if a match
368 /// was made. If no match was made, returns `false`.
369 /// `pos` will not be updated in either case.
370 #[inline]
371 pub(crate) fn match_char(&self, c: char) -> bool {
372 matches!(self.input[self.pos..].chars().next(), Some(cc) if c == cc)
373 }
374
375 /// Matches the char at the `Position` against a filter function and returns `true` if a match
376 /// was made. If no match was made, returns `false` and `pos` will not be updated.
377 #[inline]
378 pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
379 where
380 F: FnOnce(char) -> bool,
381 {
382 if let Some(c) = self.input[self.pos..].chars().next() {
383 if f(c) {
384 self.pos += c.len_utf8();
385 true
386 } else {
387 false
388 }
389 } else {
390 false
391 }
392 }
393
394 /// Matches `string` from the `Position` and returns `true` if a match was made or `false`
395 /// otherwise. If no match was made, `pos` will not be updated.
396 #[inline]
397 pub(crate) fn match_string(&mut self, string: &str) -> bool {
398 let to = self.pos + string.len();
399
400 if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) {
401 self.pos = to;
402 true
403 } else {
404 false
405 }
406 }
407
408 /// Case-insensitively matches `string` from the `Position` and returns `true` if a match was
409 /// made or `false` otherwise. If no match was made, `pos` will not be updated.
410 #[inline]
411 pub(crate) fn match_insensitive(&mut self, string: &str) -> bool {
412 let matched = {
413 let slice = &self.input[self.pos..];
414 if let Some(slice) = slice.get(0..string.len()) {
415 slice.eq_ignore_ascii_case(string)
416 } else {
417 false
418 }
419 };
420
421 if matched {
422 self.pos += string.len();
423 true
424 } else {
425 false
426 }
427 }
428
429 /// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false`
430 /// otherwise. If no match was made, `pos` will not be updated.
431 #[inline]
432 pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
433 if let Some(c) = self.input[self.pos..].chars().next() {
434 if range.start <= c && c <= range.end {
435 self.pos += c.len_utf8();
436 return true;
437 }
438 }
439
440 false
441 }
442}
443
444impl<'i> fmt::Debug for Position<'i> {
445 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
446 f.debug_struct("Position").field(name:"pos", &self.pos).finish()
447 }
448}
449
450impl<'i> PartialEq for Position<'i> {
451 fn eq(&self, other: &Position<'i>) -> bool {
452 ptr::eq(self.input, b:other.input) && self.pos == other.pos
453 }
454}
455
456impl<'i> Eq for Position<'i> {}
457
458#[allow(clippy::non_canonical_partial_ord_impl)]
459impl<'i> PartialOrd for Position<'i> {
460 fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> {
461 if ptr::eq(self.input, b:other.input) {
462 self.pos.partial_cmp(&other.pos)
463 } else {
464 None
465 }
466 }
467}
468
469impl<'i> Ord for Position<'i> {
470 fn cmp(&self, other: &Position<'i>) -> Ordering {
471 self.partial_cmp(other)
472 .expect(msg:"cannot compare positions from different strs")
473 }
474}
475
476impl<'i> Hash for Position<'i> {
477 fn hash<H: Hasher>(&self, state: &mut H) {
478 (self.input as *const str).hash(state);
479 self.pos.hash(state);
480 }
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn empty() {
489 let input = "";
490 assert!(Position::new(input, 0).unwrap().match_string(""));
491 assert!(!Position::new(input, 0).unwrap().match_string("a"));
492 }
493
494 #[test]
495 fn parts() {
496 let input = "asdasdf";
497
498 assert!(Position::new(input, 0).unwrap().match_string("asd"));
499 assert!(Position::new(input, 3).unwrap().match_string("asdf"));
500 }
501
502 #[test]
503 fn line_col() {
504 let input = "a\rb\nc\r\nd嗨";
505
506 assert_eq!(Position::new(input, 0).unwrap().line_col(), (1, 1));
507 assert_eq!(Position::new(input, 1).unwrap().line_col(), (1, 2));
508 assert_eq!(Position::new(input, 2).unwrap().line_col(), (1, 3));
509 assert_eq!(Position::new(input, 3).unwrap().line_col(), (1, 4));
510 assert_eq!(Position::new(input, 4).unwrap().line_col(), (2, 1));
511 assert_eq!(Position::new(input, 5).unwrap().line_col(), (2, 2));
512 assert_eq!(Position::new(input, 6).unwrap().line_col(), (2, 3));
513 assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
514 assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
515 assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
516 let input = "abcd嗨";
517 assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6));
518 }
519
520 #[test]
521 fn line_of() {
522 let input = "a\rb\nc\r\nd嗨";
523
524 assert_eq!(Position::new(input, 0).unwrap().line_of(), "a\rb\n");
525 assert_eq!(Position::new(input, 1).unwrap().line_of(), "a\rb\n");
526 assert_eq!(Position::new(input, 2).unwrap().line_of(), "a\rb\n");
527 assert_eq!(Position::new(input, 3).unwrap().line_of(), "a\rb\n");
528 assert_eq!(Position::new(input, 4).unwrap().line_of(), "c\r\n");
529 assert_eq!(Position::new(input, 5).unwrap().line_of(), "c\r\n");
530 assert_eq!(Position::new(input, 6).unwrap().line_of(), "c\r\n");
531 assert_eq!(Position::new(input, 7).unwrap().line_of(), "d嗨");
532 assert_eq!(Position::new(input, 8).unwrap().line_of(), "d嗨");
533 assert_eq!(Position::new(input, 11).unwrap().line_of(), "d嗨");
534 }
535
536 #[test]
537 fn line_of_empty() {
538 let input = "";
539
540 assert_eq!(Position::new(input, 0).unwrap().line_of(), "");
541 }
542
543 #[test]
544 fn line_of_new_line() {
545 let input = "\n";
546
547 assert_eq!(Position::new(input, 0).unwrap().line_of(), "\n");
548 }
549
550 #[test]
551 fn line_of_between_new_line() {
552 let input = "\n\n";
553
554 assert_eq!(Position::new(input, 1).unwrap().line_of(), "\n");
555 }
556
557 fn measure_skip(input: &str, pos: usize, n: usize) -> Option<usize> {
558 let mut p = Position::new(input, pos).unwrap();
559 if p.skip(n) {
560 Some(p.pos - pos)
561 } else {
562 None
563 }
564 }
565
566 #[test]
567 fn skip_empty() {
568 let input = "";
569
570 assert_eq!(measure_skip(input, 0, 0), Some(0));
571 assert_eq!(measure_skip(input, 0, 1), None);
572 }
573
574 #[test]
575 fn skip() {
576 let input = "d嗨";
577
578 assert_eq!(measure_skip(input, 0, 0), Some(0));
579 assert_eq!(measure_skip(input, 0, 1), Some(1));
580 assert_eq!(measure_skip(input, 1, 1), Some(3));
581 }
582
583 #[test]
584 fn skip_until() {
585 let input = "ab ac";
586 let pos = Position::from_start(input);
587
588 let mut test_pos = pos;
589 test_pos.skip_until(&["a", "b"]);
590 assert_eq!(test_pos.pos(), 0);
591
592 test_pos = pos;
593 test_pos.skip_until(&["b"]);
594 assert_eq!(test_pos.pos(), 1);
595
596 test_pos = pos;
597 test_pos.skip_until(&["ab"]);
598 assert_eq!(test_pos.pos(), 0);
599
600 test_pos = pos;
601 test_pos.skip_until(&["ac", "z"]);
602 assert_eq!(test_pos.pos(), 3);
603
604 test_pos = pos;
605 assert!(!test_pos.skip_until(&["z"]));
606 assert_eq!(test_pos.pos(), 5);
607 }
608
609 #[test]
610 fn match_range() {
611 let input = "b";
612
613 assert!(Position::new(input, 0).unwrap().match_range('a'..'c'));
614 assert!(Position::new(input, 0).unwrap().match_range('b'..'b'));
615 assert!(!Position::new(input, 0).unwrap().match_range('a'..'a'));
616 assert!(!Position::new(input, 0).unwrap().match_range('c'..'c'));
617 assert!(Position::new(input, 0).unwrap().match_range('a'..'嗨'));
618 }
619
620 #[test]
621 fn match_insensitive() {
622 let input = "AsdASdF";
623
624 assert!(Position::new(input, 0).unwrap().match_insensitive("asd"));
625 assert!(Position::new(input, 3).unwrap().match_insensitive("asdf"));
626 }
627
628 #[test]
629 fn cmp() {
630 let input = "a";
631 let start = Position::from_start(input);
632 let mut end = start;
633
634 assert!(end.skip(1));
635 let result = start.cmp(&end);
636
637 assert_eq!(result, Ordering::Less);
638 }
639
640 #[test]
641 #[should_panic]
642 fn cmp_panic() {
643 let input1 = "a";
644 let input2 = "b";
645 let pos1 = Position::from_start(input1);
646 let pos2 = Position::from_start(input2);
647
648 let _ = pos1.cmp(&pos2);
649 }
650
651 #[test]
652 #[cfg(feature = "std")]
653 fn hash() {
654 use std::collections::HashSet;
655
656 let input = "a";
657 let start = Position::from_start(input);
658 let mut positions = HashSet::new();
659
660 positions.insert(start);
661 }
662}
663