position.rs source code [crates/pest/src/position.rs]

1	// pest. The Elegant Parser
2	// Copyright (c) 2018 Dragoș Tiselice
3	//
4	// Licensed under the Apache License, Version 2.0
5	// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6	// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. All files in the project carrying such notice may not be copied,
8	// modified, or distributed except according to those terms.
9
10	use core::cmp::Ordering;
11	use core::fmt;
12	use core::hash::{Hash, Hasher};
13	use core::ops::Range;
14	use core::ptr;
15	use core::str;
16
17	use crate::span;
18
19	/// A cursor position in a `&str` which provides useful methods to manually parse that string.
20	#[derive(Clone, Copy)]
21	pub struct Position<'i> {
22	input: &'i str,
23	pos: usize,
24	}
25
26	impl<'i> Position<'i> {
27	/// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
28	pub(crate) fn new_internal(input: &str, pos: usize) -> Position<'_> {
29	debug_assert!(input.get(pos..).is_some());
30	Position { input, pos }
31	}
32
33	/// Attempts to create a new `Position` at the given position. If the specified position is
34	/// an invalid index, or the specified position is not a valid UTF8 boundary, then None is
35	/// returned.
36	///
37	/// # Examples
38	/// ```
39	/// # use pest::Position;
40	/// let cheart = '💖';
41	/// let heart = "💖";
42	/// assert_eq!(Position::new(heart, `1`), None);
43	/// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
44	/// ```
45	pub fn new(input: &str, pos: usize) -> Option<Position<'_>> {
46	input.get(pos..).map(\|_\| Position { input, pos })
47	}
48
49	/// Creates a `Position` at the start of a `&str`.
50	///
51	/// # Examples
52	///
53	/// ```
54	/// # use pest::Position;
55	/// let start = Position::from_start("");
56	/// assert_eq!(start.pos(), `0`);
57	/// ```
58	#[inline]
59	pub fn from_start(input: &'i str) -> Position<'i> {
60	// Position 0 is always safe because it's always a valid UTF-8 border.
61	Position { input, pos: `0` }
62	}
63
64	/// Returns the byte position of this `Position` as a `usize`.
65	///
66	/// # Examples
67	///
68	/// ```
69	/// # use pest::Position;
70	/// let input = "ab";
71	/// let mut start = Position::from_start(input);
72	///
73	/// assert_eq!(start.pos(), `0`);
74	/// ```
75	#[inline]
76	pub fn pos(&self) -> usize {
77	self.pos
78	}
79
80	/// Creates a `Span` from two `Position`s.
81	///
82	/// # Panics
83	///
84	/// Panics if the positions come from different inputs.
85	///
86	/// # Examples
87	///
88	/// ```
89	/// # use pest::Position;
90	/// let input = "ab";
91	/// let start = Position::from_start(input);
92	/// let span = start.span(&start.clone());
93	///
94	/// assert_eq!(span.start(), `0`);
95	/// assert_eq!(span.end(), `0`);
96	/// ```
97	#[inline]
98	pub fn span(&self, other: &Position<'i>) -> span::Span<'i> {
99	if ptr::eq(self.input, other.input)
100	/ && self.input.get(self.pos..other.pos).is_some() /
101	{
102	span::Span::new_internal(self.input, self.pos, other.pos)
103	} else {
104	// TODO: maybe a panic if self.pos < other.pos
105	panic!("span created from positions from different inputs")
106	}
107	}
108
109	/// Returns the line and column number of this `Position`.
110	///
111	/// This is an O(n) operation, where n is the number of chars in the input.
112	/// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead.
113	///
114	/// # Examples
115	///
116	/// ```
117	/// # use pest;
118	/// # #[allow(non_camel_case_types)]
119	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
120	/// enum Rule {}
121	///
122	/// let input = "`\n`a";
123	/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
124	/// let mut result = state.match_string("`\n`a");
125	/// assert!(result.is_ok());
126	/// assert_eq!(result.unwrap().position().line_col(), (`2`, `2`));
127	/// ```
128	#[inline]
129	pub fn line_col(&self) -> (usize, usize) {
130	if self.pos > self.input.len() {
131	panic!("position out of bounds");
132	}
133	let mut pos = self.pos;
134	let slice = &self.input[..pos];
135	let mut chars = slice.chars().peekable();
136
137	let mut line_col = (`1`, `1`);
138
139	while pos != `0` {
140	match chars.next() {
141	Some('`\r`') => {
142	if let Some(&'`\n`') = chars.peek() {
143	chars.next();
144
145	if pos == `1` {
146	pos -= `1`;
147	} else {
148	pos -= `2`;
149	}
150
151	line_col = (line_col.0 + `1`, `1`);
152	} else {
153	pos -= `1`;
154	line_col = (line_col.0, line_col.1 + `1`);
155	}
156	}
157	Some('`\n`') => {
158	pos -= `1`;
159	line_col = (line_col.0 + `1`, `1`);
160	}
161	Some(c) => {
162	pos -= c.len_utf8();
163	line_col = (line_col.0, line_col.1 + `1`);
164	}
165	None => unreachable!(),
166	}
167	}
168
169	line_col
170	}
171
172	/// Returns the entire line of the input that contains this `Position`.
173	///
174	/// # Examples
175	///
176	/// ```
177	/// # use pest;
178	/// # #[allow(non_camel_case_types)]
179	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
180	/// enum Rule {}
181	///
182	/// let input = "`\n`a";
183	/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
184	/// let mut result = state.match_string("`\n`a");
185	/// assert!(result.is_ok());
186	/// assert_eq!(result.unwrap().position().line_of(), "a");
187	/// ```
188	#[inline]
189	pub fn line_of(&self) -> &'i str {
190	if self.pos > self.input.len() {
191	panic!("position out of bounds");
192	};
193	// Safe since start and end can only be valid UTF-8 borders.
194	&self.input[self.find_line_start()..self.find_line_end()]
195	}
196
197	pub(crate) fn find_line_start(&self) -> usize {
198	if self.input.is_empty() {
199	return `0`;
200	};
201	// Position's pos is always a UTF-8 border.
202	let start = self
203	.input
204	.char_indices()
205	.rev()
206	.skip_while(\|&(i, _)\| i >= self.pos)
207	.find(\|&(_, c)\| c == '`\n`');
208	match start {
209	Some((i, _)) => i + `1`,
210	None => `0`,
211	}
212	}
213
214	pub(crate) fn find_line_end(&self) -> usize {
215	if self.input.is_empty() {
216	`0`
217	} else if self.pos == self.input.len() - `1` {
218	self.input.len()
219	} else {
220	// Position's pos is always a UTF-8 border.
221	let end = self
222	.input
223	.char_indices()
224	.skip_while(\|&(i, _)\| i < self.pos)
225	.find(\|&(_, c)\| c == '`\n`');
226	match end {
227	Some((i, _)) => i + `1`,
228	None => self.input.len(),
229	}
230	}
231	}
232
233	/// Returns `true` when the `Position` points to the start of the input `&str`.
234	#[inline]
235	pub(crate) fn at_start(&self) -> bool {
236	self.pos == `0`
237	}
238
239	/// Returns `true` when the `Position` points to the end of the input `&str`.
240	#[inline]
241	pub(crate) fn at_end(&self) -> bool {
242	self.pos == self.input.len()
243	}
244
245	/// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
246	/// otherwise. If the return value is `false`, `pos` will not be updated.
247	#[inline]
248	pub(crate) fn skip(&mut self, n: usize) -> bool {
249	let skipped = {
250	let mut len = `0`;
251	// Position's pos is always a UTF-8 border.
252	let mut chars = self.input[self.pos..].chars();
253	for _ in `0`..n {
254	if let Some(c) = chars.next() {
255	len += c.len_utf8();
256	} else {
257	return `false`;
258	}
259	}
260	len
261	};
262
263	self.pos += skipped;
264	`true`
265	}
266
267	/// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
268	/// otherwise. If the return value is `false`, `pos` will not be updated.
269	#[inline]
270	pub(crate) fn skip_back(&mut self, n: usize) -> bool {
271	let skipped = {
272	let mut len = `0`;
273	// Position's pos is always a UTF-8 border.
274	let mut chars = self.input[..self.pos].chars().rev();
275	for _ in `0`..n {
276	if let Some(c) = chars.next() {
277	len += c.len_utf8();
278	} else {
279	return `false`;
280	}
281	}
282	len
283	};
284
285	self.pos -= skipped;
286	`true`
287	}
288
289	/// Skips until one of the given `strings` is found. If none of the `strings` can be found,
290	/// this function will return `false` but its `pos` will still* be updated.*
291	#[inline]
292	pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
293	#[cfg(not(feature = "memchr"))]
294	{
295	self.skip_until_basic(strings)
296	}
297	#[cfg(feature = "memchr")]
298	{
299	match strings {
300	[] => (),
301	[s1] => {
302	if let Some(from) =
303	memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes())
304	{
305	self.pos += from;
306	return `true`;
307	}
308	}
309	[s1, s2] if !s1.is_empty() && !s2.is_empty() => {
310	let b1 = s1.as_bytes()[`0`];
311	let b2 = s2.as_bytes()[`0`];
312	let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]);
313	for from in miter {
314	let start = &self.input[self.pos + from..];
315	if start.starts_with(s1) \|\| start.starts_with(s2) {
316	self.pos += from;
317	return `true`;
318	}
319	}
320	}
321	[s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => {
322	let b1 = s1.as_bytes()[`0`];
323	let b2 = s2.as_bytes()[`0`];
324	let b3 = s2.as_bytes()[`0`];
325	let miter =
326	memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]);
327	for from in miter {
328	let start = &self.input[self.pos + from..];
329	if start.starts_with(s1) \|\| start.starts_with(s2) \|\| start.starts_with(s3) {
330	self.pos += from;
331	return `true`;
332	}
333	}
334	}
335	_ => {
336	return self.skip_until_basic(strings);
337	}
338	}
339	self.pos = self.input.len();
340	`false`
341	}
342	}
343
344	#[inline]
345	fn skip_until_basic(&mut self, strings: &[&str]) -> bool {
346	// TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse?
347	for from in self.pos..self.input.len() {
348	let bytes = if let Some(string) = self.input.get(from..) {
349	string.as_bytes()
350	} else {
351	continue;
352	};
353
354	for slice in strings.iter() {
355	let to = slice.len();
356	if Some(slice.as_bytes()) == bytes.get(`0`..to) {
357	self.pos = from;
358	return `true`;
359	}
360	}
361	}
362
363	self.pos = self.input.len();
364	`false`
365	}
366
367	/// Matches the char at the `Position` against a specified character and returns `true` if a match
368	/// was made. If no match was made, returns `false`.
369	/// `pos` will not be updated in either case.
370	#[inline]
371	pub(crate) fn match_char(&self, c: char) -> bool {
372	matches!(self.input[self.pos..].chars().next(), Some(cc) if c == cc)
373	}
374
375	/// Matches the char at the `Position` against a filter function and returns `true` if a match
376	/// was made. If no match was made, returns `false` and `pos` will not be updated.
377	#[inline]
378	pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
379	where
380	F: FnOnce(char) -> bool,
381	{
382	if let Some(c) = self.input[self.pos..].chars().next() {
383	if f(c) {
384	self.pos += c.len_utf8();
385	`true`
386	} else {
387	`false`
388	}
389	} else {
390	`false`
391	}
392	}
393
394	/// Matches `string` from the `Position` and returns `true` if a match was made or `false`
395	/// otherwise. If no match was made, `pos` will not be updated.
396	#[inline]
397	pub(crate) fn match_string(&mut self, string: &str) -> bool {
398	let to = self.pos + string.len();
399
400	if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) {
401	self.pos = to;
402	`true`
403	} else {
404	`false`
405	}
406	}
407
408	/// Case-insensitively matches `string` from the `Position` and returns `true` if a match was
409	/// made or `false` otherwise. If no match was made, `pos` will not be updated.
410	#[inline]
411	pub(crate) fn match_insensitive(&mut self, string: &str) -> bool {
412	let matched = {
413	let slice = &self.input[self.pos..];
414	if let Some(slice) = slice.get(`0`..string.len()) {
415	slice.eq_ignore_ascii_case(string)
416	} else {
417	`false`
418	}
419	};
420
421	if matched {
422	self.pos += string.len();
423	`true`
424	} else {
425	`false`
426	}
427	}
428
429	/// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false`
430	/// otherwise. If no match was made, `pos` will not be updated.
431	#[inline]
432	pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
433	if let Some(c) = self.input[self.pos..].chars().next() {
434	if range.start <= c && c <= range.end {
435	self.pos += c.len_utf8();
436	return `true`;
437	}
438	}
439
440	`false`
441	}
442	}
443
444	impl<'i> fmt::Debug for Position<'i> {
445	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
446	f.debug_struct("Position").field(name:"pos", &self.pos).finish()
447	}
448	}
449
450	impl<'i> PartialEq for Position<'i> {
451	fn eq(&self, other: &Position<'i>) -> bool {
452	ptr::eq(self.input, b:other.input) && self.pos == other.pos
453	}
454	}
455
456	impl<'i> Eq for Position<'i> {}
457
458	#[allow(clippy::non_canonical_partial_ord_impl)]
459	impl<'i> PartialOrd for Position<'i> {
460	fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> {
461	if ptr::eq(self.input, b:other.input) {
462	self.pos.partial_cmp(&other.pos)
463	} else {
464	None
465	}
466	}
467	}
468
469	impl<'i> Ord for Position<'i> {
470	fn cmp(&self, other: &Position<'i>) -> Ordering {
471	self.partial_cmp(other)
472	.expect(msg:"cannot compare positions from different strs")
473	}
474	}
475
476	impl<'i> Hash for Position<'i> {
477	fn hash<H: Hasher>(&self, state: &mut H) {
478	(self.input as *const str).hash(state);
479	self.pos.hash(state);
480	}
481	}
482
483	#[cfg(test)]
484	mod tests {
485	use super::*;
486
487	#[test]
488	fn empty() {
489	let input = "";
490	assert!(Position::new(input, `0`).unwrap().match_string(""));
491	assert!(!Position::new(input, `0`).unwrap().match_string("a"));
492	}
493
494	#[test]
495	fn parts() {
496	let input = "asdasdf";
497
498	assert!(Position::new(input, `0`).unwrap().match_string("asd"));
499	assert!(Position::new(input, `3`).unwrap().match_string("asdf"));
500	}
501
502	#[test]
503	fn line_col() {
504	let input = "a`\r`b`\n`c`\r\n`d嗨";
505
506	assert_eq!(Position::new(input, `0`).unwrap().line_col(), (`1`, `1`));
507	assert_eq!(Position::new(input, `1`).unwrap().line_col(), (`1`, `2`));
508	assert_eq!(Position::new(input, `2`).unwrap().line_col(), (`1`, `3`));
509	assert_eq!(Position::new(input, `3`).unwrap().line_col(), (`1`, `4`));
510	assert_eq!(Position::new(input, `4`).unwrap().line_col(), (`2`, `1`));
511	assert_eq!(Position::new(input, `5`).unwrap().line_col(), (`2`, `2`));
512	assert_eq!(Position::new(input, `6`).unwrap().line_col(), (`2`, `3`));
513	assert_eq!(Position::new(input, `7`).unwrap().line_col(), (`3`, `1`));
514	assert_eq!(Position::new(input, `8`).unwrap().line_col(), (`3`, `2`));
515	assert_eq!(Position::new(input, `11`).unwrap().line_col(), (`3`, `3`));
516	let input = "abcd嗨";
517	assert_eq!(Position::new(input, `7`).unwrap().line_col(), (`1`, `6`));
518	}
519
520	#[test]
521	fn line_of() {
522	let input = "a`\r`b`\n`c`\r\n`d嗨";
523
524	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "a`\r`b`\n`");
525	assert_eq!(Position::new(input, `1`).unwrap().line_of(), "a`\r`b`\n`");
526	assert_eq!(Position::new(input, `2`).unwrap().line_of(), "a`\r`b`\n`");
527	assert_eq!(Position::new(input, `3`).unwrap().line_of(), "a`\r`b`\n`");
528	assert_eq!(Position::new(input, `4`).unwrap().line_of(), "c`\r\n`");
529	assert_eq!(Position::new(input, `5`).unwrap().line_of(), "c`\r\n`");
530	assert_eq!(Position::new(input, `6`).unwrap().line_of(), "c`\r\n`");
531	assert_eq!(Position::new(input, `7`).unwrap().line_of(), "d嗨");
532	assert_eq!(Position::new(input, `8`).unwrap().line_of(), "d嗨");
533	assert_eq!(Position::new(input, `11`).unwrap().line_of(), "d嗨");
534	}
535
536	#[test]
537	fn line_of_empty() {
538	let input = "";
539
540	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "");
541	}
542
543	#[test]
544	fn line_of_new_line() {
545	let input = "`\n`";
546
547	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "`\n`");
548	}
549
550	#[test]
551	fn line_of_between_new_line() {
552	let input = "`\n\n`";
553
554	assert_eq!(Position::new(input, `1`).unwrap().line_of(), "`\n`");
555	}
556
557	fn measure_skip(input: &str, pos: usize, n: usize) -> Option<usize> {
558	let mut p = Position::new(input, pos).unwrap();
559	if p.skip(n) {
560	Some(p.pos - pos)
561	} else {
562	None
563	}
564	}
565
566	#[test]
567	fn skip_empty() {
568	let input = "";
569
570	assert_eq!(measure_skip(input, `0`, `0`), Some(`0`));
571	assert_eq!(measure_skip(input, `0`, `1`), None);
572	}
573
574	#[test]
575	fn skip() {
576	let input = "d嗨";
577
578	assert_eq!(measure_skip(input, `0`, `0`), Some(`0`));
579	assert_eq!(measure_skip(input, `0`, `1`), Some(`1`));
580	assert_eq!(measure_skip(input, `1`, `1`), Some(`3`));
581	}
582
583	#[test]
584	fn skip_until() {
585	let input = "ab ac";
586	let pos = Position::from_start(input);
587
588	let mut test_pos = pos;
589	test_pos.skip_until(&["a", "b"]);
590	assert_eq!(test_pos.pos(), `0`);
591
592	test_pos = pos;
593	test_pos.skip_until(&["b"]);
594	assert_eq!(test_pos.pos(), `1`);
595
596	test_pos = pos;
597	test_pos.skip_until(&["ab"]);
598	assert_eq!(test_pos.pos(), `0`);
599
600	test_pos = pos;
601	test_pos.skip_until(&["ac", "z"]);
602	assert_eq!(test_pos.pos(), `3`);
603
604	test_pos = pos;
605	assert!(!test_pos.skip_until(&["z"]));
606	assert_eq!(test_pos.pos(), `5`);
607	}
608
609	#[test]
610	fn match_range() {
611	let input = "b";
612
613	assert!(Position::new(input, `0`).unwrap().match_range('a'..'c'));
614	assert!(Position::new(input, `0`).unwrap().match_range('b'..'b'));
615	assert!(!Position::new(input, `0`).unwrap().match_range('a'..'a'));
616	assert!(!Position::new(input, `0`).unwrap().match_range('c'..'c'));
617	assert!(Position::new(input, `0`).unwrap().match_range('a'..'嗨'));
618	}
619
620	#[test]
621	fn match_insensitive() {
622	let input = "AsdASdF";
623
624	assert!(Position::new(input, `0`).unwrap().match_insensitive("asd"));
625	assert!(Position::new(input, `3`).unwrap().match_insensitive("asdf"));
626	}
627
628	#[test]
629	fn cmp() {
630	let input = "a";
631	let start = Position::from_start(input);
632	let mut end = start;
633
634	assert!(end.skip(`1`));
635	let result = start.cmp(&end);
636
637	assert_eq!(result, Ordering::Less);
638	}
639
640	#[test]
641	#[should_panic]
642	fn cmp_panic() {
643	let input1 = "a";
644	let input2 = "b";
645	let pos1 = Position::from_start(input1);
646	let pos2 = Position::from_start(input2);
647
648	let _ = pos1.cmp(&pos2);
649	}
650
651	#[test]
652	#[cfg(feature = "std")]
653	fn hash() {
654	use std::collections::HashSet;
655
656	let input = "a";
657	let start = Position::from_start(input);
658	let mut positions = HashSet::new();
659
660	positions.insert(start);
661	}
662	}
663