position.rs source code [crates/pest-2.7.0/src/position.rs]

1	// pest. The Elegant Parser
2	// Copyright (c) 2018 Dragoș Tiselice
3	//
4	// Licensed under the Apache License, Version 2.0
5	// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6	// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. All files in the project carrying such notice may not be copied,
8	// modified, or distributed except according to those terms.
9
10	use core::cmp::Ordering;
11	use core::fmt;
12	use core::hash::{Hash, Hasher};
13	use core::ops::Range;
14	use core::ptr;
15	use core::str;
16
17	use crate::span;
18
19	/// A cursor position in a `&str` which provides useful methods to manually parse that string.
20	#[derive(Clone, Copy)]
21	pub struct Position<'i> {
22	input: &'i str,
23	/// # Safety:
24	///
25	/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
26	pos: usize,
27	}
28
29	impl<'i> Position<'i> {
30	/// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
31	///
32	/// # Safety:
33	///
34	/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
35	pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> {
36	debug_assert!(input.get(pos..).is_some());
37	Position { input, pos }
38	}
39
40	/// Attempts to create a new `Position` at the given position. If the specified position is
41	/// an invalid index, or the specified position is not a valid UTF8 boundary, then None is
42	/// returned.
43	///
44	/// # Examples
45	/// ```
46	/// # use pest::Position;
47	/// let cheart = '💖';
48	/// let heart = "💖";
49	/// assert_eq!(Position::new(heart, `1`), None);
50	/// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
51	/// ```
52	pub fn new(input: &str, pos: usize) -> Option<Position<'_>> {
53	input.get(pos..).map(\|_\| Position { input, pos })
54	}
55
56	/// Creates a `Position` at the start of a `&str`.
57	///
58	/// # Examples
59	///
60	/// ```
61	/// # use pest::Position;
62	/// let start = Position::from_start("");
63	/// assert_eq!(start.pos(), `0`);
64	/// ```
65	#[inline]
66	pub fn from_start(input: &'i str) -> Position<'i> {
67	// Position 0 is always safe because it's always a valid UTF-8 border.
68	Position { input, pos: `0` }
69	}
70
71	/// Returns the byte position of this `Position` as a `usize`.
72	///
73	/// # Examples
74	///
75	/// ```
76	/// # use pest::Position;
77	/// let input = "ab";
78	/// let mut start = Position::from_start(input);
79	///
80	/// assert_eq!(start.pos(), `0`);
81	/// ```
82	#[inline]
83	pub fn pos(&self) -> usize {
84	self.pos
85	}
86
87	/// Creates a `Span` from two `Position`s.
88	///
89	/// # Panics
90	///
91	/// Panics if the positions come from different inputs.
92	///
93	/// # Examples
94	///
95	/// ```
96	/// # use pest::Position;
97	/// let input = "ab";
98	/// let start = Position::from_start(input);
99	/// let span = start.span(&start.clone());
100	///
101	/// assert_eq!(span.start(), `0`);
102	/// assert_eq!(span.end(), `0`);
103	/// ```
104	#[inline]
105	pub fn span(&self, other: &Position<'i>) -> span::Span<'i> {
106	if ptr::eq(self.input, other.input)
107	/ && self.input.get(self.pos..other.pos).is_some() /
108	{
109	// This is safe because the pos field of a Position should always be a valid str index.
110	unsafe { span::Span::new_unchecked(self.input, self.pos, other.pos) }
111	} else {
112	// TODO: maybe a panic if self.pos < other.pos
113	panic!("span created from positions from different inputs")
114	}
115	}
116
117	/// Returns the line and column number of this `Position`.
118	///
119	/// This is an O(n) operation, where n is the number of chars in the input.
120	/// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead.
121	///
122	/// # Examples
123	///
124	/// ```
125	/// # use pest;
126	/// # #[allow(non_camel_case_types)]
127	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
128	/// enum Rule {}
129	///
130	/// let input = "`\n`a";
131	/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
132	/// let mut result = state.match_string("`\n`a");
133	/// assert!(result.is_ok());
134	/// assert_eq!(result.unwrap().position().line_col(), (`2`, `2`));
135	/// ```
136	#[inline]
137	pub fn line_col(&self) -> (usize, usize) {
138	if self.pos > self.input.len() {
139	panic!("position out of bounds");
140	}
141	let mut pos = self.pos;
142	let slice = &self.input[..pos];
143	let mut chars = slice.chars().peekable();
144
145	let mut line_col = (`1`, `1`);
146
147	while pos != `0` {
148	match chars.next() {
149	Some('`\r`') => {
150	if let Some(&'`\n`') = chars.peek() {
151	chars.next();
152
153	if pos == `1` {
154	pos -= `1`;
155	} else {
156	pos -= `2`;
157	}
158
159	line_col = (line_col.0 + `1`, `1`);
160	} else {
161	pos -= `1`;
162	line_col = (line_col.0, line_col.1 + `1`);
163	}
164	}
165	Some('`\n`') => {
166	pos -= `1`;
167	line_col = (line_col.0 + `1`, `1`);
168	}
169	Some(c) => {
170	pos -= c.len_utf8();
171	line_col = (line_col.0, line_col.1 + `1`);
172	}
173	None => unreachable!(),
174	}
175	}
176
177	line_col
178	}
179
180	/// Returns the entire line of the input that contains this `Position`.
181	///
182	/// # Examples
183	///
184	/// ```
185	/// # use pest;
186	/// # #[allow(non_camel_case_types)]
187	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
188	/// enum Rule {}
189	///
190	/// let input = "`\n`a";
191	/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
192	/// let mut result = state.match_string("`\n`a");
193	/// assert!(result.is_ok());
194	/// assert_eq!(result.unwrap().position().line_of(), "a");
195	/// ```
196	#[inline]
197	pub fn line_of(&self) -> &'i str {
198	if self.pos > self.input.len() {
199	panic!("position out of bounds");
200	};
201	// Safe since start and end can only be valid UTF-8 borders.
202	&self.input[self.find_line_start()..self.find_line_end()]
203	}
204
205	pub(crate) fn find_line_start(&self) -> usize {
206	if self.input.is_empty() {
207	return `0`;
208	};
209	// Position's pos is always a UTF-8 border.
210	let start = self
211	.input
212	.char_indices()
213	.rev()
214	.skip_while(\|&(i, _)\| i >= self.pos)
215	.find(\|&(_, c)\| c == '`\n`');
216	match start {
217	Some((i, _)) => i + `1`,
218	None => `0`,
219	}
220	}
221
222	pub(crate) fn find_line_end(&self) -> usize {
223	if self.input.is_empty() {
224	`0`
225	} else if self.pos == self.input.len() - `1` {
226	self.input.len()
227	} else {
228	// Position's pos is always a UTF-8 border.
229	let end = self
230	.input
231	.char_indices()
232	.skip_while(\|&(i, _)\| i < self.pos)
233	.find(\|&(_, c)\| c == '`\n`');
234	match end {
235	Some((i, _)) => i + `1`,
236	None => self.input.len(),
237	}
238	}
239	}
240
241	/// Returns `true` when the `Position` points to the start of the input `&str`.
242	#[inline]
243	pub(crate) fn at_start(&self) -> bool {
244	self.pos == `0`
245	}
246
247	/// Returns `true` when the `Position` points to the end of the input `&str`.
248	#[inline]
249	pub(crate) fn at_end(&self) -> bool {
250	self.pos == self.input.len()
251	}
252
253	/// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
254	/// otherwise. If the return value is `false`, `pos` will not be updated.
255	#[inline]
256	pub(crate) fn skip(&mut self, n: usize) -> bool {
257	let skipped = {
258	let mut len = `0`;
259	// Position's pos is always a UTF-8 border.
260	let mut chars = self.input[self.pos..].chars();
261	for _ in `0`..n {
262	if let Some(c) = chars.next() {
263	len += c.len_utf8();
264	} else {
265	return `false`;
266	}
267	}
268	len
269	};
270
271	self.pos += skipped;
272	`true`
273	}
274
275	/// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
276	/// otherwise. If the return value is `false`, `pos` will not be updated.
277	#[inline]
278	pub(crate) fn skip_back(&mut self, n: usize) -> bool {
279	let skipped = {
280	let mut len = `0`;
281	// Position's pos is always a UTF-8 border.
282	let mut chars = self.input[..self.pos].chars().rev();
283	for _ in `0`..n {
284	if let Some(c) = chars.next() {
285	len += c.len_utf8();
286	} else {
287	return `false`;
288	}
289	}
290	len
291	};
292
293	self.pos -= skipped;
294	`true`
295	}
296
297	/// Skips until one of the given `strings` is found. If none of the `strings` can be found,
298	/// this function will return `false` but its `pos` will still* be updated.*
299	#[inline]
300	pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
301	#[cfg(not(feature = "memchr"))]
302	{
303	self.skip_until_basic(strings)
304	}
305	#[cfg(feature = "memchr")]
306	{
307	match strings {
308	[] => (),
309	[s1] => {
310	if let Some(from) =
311	memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes())
312	{
313	self.pos += from;
314	return `true`;
315	}
316	}
317	[s1, s2] if !s1.is_empty() && !s2.is_empty() => {
318	let b1 = s1.as_bytes()[`0`];
319	let b2 = s2.as_bytes()[`0`];
320	let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]);
321	for from in miter {
322	let start = &self.input[self.pos + from..];
323	if start.starts_with(s1) \|\| start.starts_with(s2) {
324	self.pos += from;
325	return `true`;
326	}
327	}
328	}
329	[s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => {
330	let b1 = s1.as_bytes()[`0`];
331	let b2 = s2.as_bytes()[`0`];
332	let b3 = s2.as_bytes()[`0`];
333	let miter =
334	memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]);
335	for from in miter {
336	let start = &self.input[self.pos + from..];
337	if start.starts_with(s1) \|\| start.starts_with(s2) \|\| start.starts_with(s3) {
338	self.pos += from;
339	return `true`;
340	}
341	}
342	}
343	_ => {
344	return self.skip_until_basic(strings);
345	}
346	}
347	self.pos = self.input.len();
348	`false`
349	}
350	}
351
352	#[inline]
353	fn skip_until_basic(&mut self, strings: &[&str]) -> bool {
354	// TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse?
355	for from in self.pos..self.input.len() {
356	let bytes = if let Some(string) = self.input.get(from..) {
357	string.as_bytes()
358	} else {
359	continue;
360	};
361
362	for slice in strings.iter() {
363	let to = slice.len();
364	if Some(slice.as_bytes()) == bytes.get(`0`..to) {
365	self.pos = from;
366	return `true`;
367	}
368	}
369	}
370
371	self.pos = self.input.len();
372	`false`
373	}
374
375	/// Matches the char at the `Position` against a specified character and returns `true` if a match
376	/// was made. If no match was made, returns `false`.
377	/// `pos` will not be updated in either case.
378	#[inline]
379	pub(crate) fn match_char(&self, c: char) -> bool {
380	matches!(self.input[self.pos..].chars().next(), Some(cc) if c == cc)
381	}
382
383	/// Matches the char at the `Position` against a filter function and returns `true` if a match
384	/// was made. If no match was made, returns `false` and `pos` will not be updated.
385	#[inline]
386	pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
387	where
388	F: FnOnce(char) -> bool,
389	{
390	if let Some(c) = self.input[self.pos..].chars().next() {
391	if f(c) {
392	self.pos += c.len_utf8();
393	`true`
394	} else {
395	`false`
396	}
397	} else {
398	`false`
399	}
400	}
401
402	/// Matches `string` from the `Position` and returns `true` if a match was made or `false`
403	/// otherwise. If no match was made, `pos` will not be updated.
404	#[inline]
405	pub(crate) fn match_string(&mut self, string: &str) -> bool {
406	let to = self.pos + string.len();
407
408	if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) {
409	self.pos = to;
410	`true`
411	} else {
412	`false`
413	}
414	}
415
416	/// Case-insensitively matches `string` from the `Position` and returns `true` if a match was
417	/// made or `false` otherwise. If no match was made, `pos` will not be updated.
418	#[inline]
419	pub(crate) fn match_insensitive(&mut self, string: &str) -> bool {
420	let matched = {
421	let slice = &self.input[self.pos..];
422	if let Some(slice) = slice.get(`0`..string.len()) {
423	slice.eq_ignore_ascii_case(string)
424	} else {
425	`false`
426	}
427	};
428
429	if matched {
430	self.pos += string.len();
431	`true`
432	} else {
433	`false`
434	}
435	}
436
437	/// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false`
438	/// otherwise. If no match was made, `pos` will not be updated.
439	#[inline]
440	pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
441	if let Some(c) = self.input[self.pos..].chars().next() {
442	if range.start <= c && c <= range.end {
443	self.pos += c.len_utf8();
444	return `true`;
445	}
446	}
447
448	`false`
449	}
450	}
451
452	impl<'i> fmt::Debug for Position<'i> {
453	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
454	f.debug_struct("Position").field(name:"pos", &self.pos).finish()
455	}
456	}
457
458	impl<'i> PartialEq for Position<'i> {
459	fn eq(&self, other: &Position<'i>) -> bool {
460	ptr::eq(self.input, b:other.input) && self.pos == other.pos
461	}
462	}
463
464	impl<'i> Eq for Position<'i> {}
465
466	impl<'i> PartialOrd for Position<'i> {
467	fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> {
468	if ptr::eq(self.input, b:other.input) {
469	self.pos.partial_cmp(&other.pos)
470	} else {
471	None
472	}
473	}
474	}
475
476	impl<'i> Ord for Position<'i> {
477	fn cmp(&self, other: &Position<'i>) -> Ordering {
478	self.partial_cmp(other)
479	.expect(msg:"cannot compare positions from different strs")
480	}
481	}
482
483	impl<'i> Hash for Position<'i> {
484	fn hash<H: Hasher>(&self, state: &mut H) {
485	(self.input as *const str).hash(state);
486	self.pos.hash(state);
487	}
488	}
489
490	#[cfg(test)]
491	mod tests {
492	use super::*;
493
494	#[test]
495	fn empty() {
496	let input = "";
497	assert!(Position::new(input, `0`).unwrap().match_string(""));
498	assert!(!Position::new(input, `0`).unwrap().match_string("a"));
499	}
500
501	#[test]
502	fn parts() {
503	let input = "asdasdf";
504
505	assert!(Position::new(input, `0`).unwrap().match_string("asd"));
506	assert!(Position::new(input, `3`).unwrap().match_string("asdf"));
507	}
508
509	#[test]
510	fn line_col() {
511	let input = "a`\r`b`\n`c`\r\n`d嗨";
512
513	assert_eq!(Position::new(input, `0`).unwrap().line_col(), (`1`, `1`));
514	assert_eq!(Position::new(input, `1`).unwrap().line_col(), (`1`, `2`));
515	assert_eq!(Position::new(input, `2`).unwrap().line_col(), (`1`, `3`));
516	assert_eq!(Position::new(input, `3`).unwrap().line_col(), (`1`, `4`));
517	assert_eq!(Position::new(input, `4`).unwrap().line_col(), (`2`, `1`));
518	assert_eq!(Position::new(input, `5`).unwrap().line_col(), (`2`, `2`));
519	assert_eq!(Position::new(input, `6`).unwrap().line_col(), (`2`, `3`));
520	assert_eq!(Position::new(input, `7`).unwrap().line_col(), (`3`, `1`));
521	assert_eq!(Position::new(input, `8`).unwrap().line_col(), (`3`, `2`));
522	assert_eq!(Position::new(input, `11`).unwrap().line_col(), (`3`, `3`));
523	let input = "abcd嗨";
524	assert_eq!(Position::new(input, `7`).unwrap().line_col(), (`1`, `6`));
525	}
526
527	#[test]
528	fn line_of() {
529	let input = "a`\r`b`\n`c`\r\n`d嗨";
530
531	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "a`\r`b`\n`");
532	assert_eq!(Position::new(input, `1`).unwrap().line_of(), "a`\r`b`\n`");
533	assert_eq!(Position::new(input, `2`).unwrap().line_of(), "a`\r`b`\n`");
534	assert_eq!(Position::new(input, `3`).unwrap().line_of(), "a`\r`b`\n`");
535	assert_eq!(Position::new(input, `4`).unwrap().line_of(), "c`\r\n`");
536	assert_eq!(Position::new(input, `5`).unwrap().line_of(), "c`\r\n`");
537	assert_eq!(Position::new(input, `6`).unwrap().line_of(), "c`\r\n`");
538	assert_eq!(Position::new(input, `7`).unwrap().line_of(), "d嗨");
539	assert_eq!(Position::new(input, `8`).unwrap().line_of(), "d嗨");
540	assert_eq!(Position::new(input, `11`).unwrap().line_of(), "d嗨");
541	}
542
543	#[test]
544	fn line_of_empty() {
545	let input = "";
546
547	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "");
548	}
549
550	#[test]
551	fn line_of_new_line() {
552	let input = "`\n`";
553
554	assert_eq!(Position::new(input, `0`).unwrap().line_of(), "`\n`");
555	}
556
557	#[test]
558	fn line_of_between_new_line() {
559	let input = "`\n\n`";
560
561	assert_eq!(Position::new(input, `1`).unwrap().line_of(), "`\n`");
562	}
563
564	fn measure_skip(input: &str, pos: usize, n: usize) -> Option<usize> {
565	let mut p = Position::new(input, pos).unwrap();
566	if p.skip(n) {
567	Some(p.pos - pos)
568	} else {
569	None
570	}
571	}
572
573	#[test]
574	fn skip_empty() {
575	let input = "";
576
577	assert_eq!(measure_skip(input, `0`, `0`), Some(`0`));
578	assert_eq!(measure_skip(input, `0`, `1`), None);
579	}
580
581	#[test]
582	fn skip() {
583	let input = "d嗨";
584
585	assert_eq!(measure_skip(input, `0`, `0`), Some(`0`));
586	assert_eq!(measure_skip(input, `0`, `1`), Some(`1`));
587	assert_eq!(measure_skip(input, `1`, `1`), Some(`3`));
588	}
589
590	#[test]
591	fn skip_until() {
592	let input = "ab ac";
593	let pos = Position::from_start(input);
594
595	let mut test_pos = pos;
596	test_pos.skip_until(&["a", "b"]);
597	assert_eq!(test_pos.pos(), `0`);
598
599	test_pos = pos;
600	test_pos.skip_until(&["b"]);
601	assert_eq!(test_pos.pos(), `1`);
602
603	test_pos = pos;
604	test_pos.skip_until(&["ab"]);
605	assert_eq!(test_pos.pos(), `0`);
606
607	test_pos = pos;
608	test_pos.skip_until(&["ac", "z"]);
609	assert_eq!(test_pos.pos(), `3`);
610
611	test_pos = pos;
612	assert!(!test_pos.skip_until(&["z"]));
613	assert_eq!(test_pos.pos(), `5`);
614	}
615
616	#[test]
617	fn match_range() {
618	let input = "b";
619
620	assert!(Position::new(input, `0`).unwrap().match_range('a'..'c'));
621	assert!(Position::new(input, `0`).unwrap().match_range('b'..'b'));
622	assert!(!Position::new(input, `0`).unwrap().match_range('a'..'a'));
623	assert!(!Position::new(input, `0`).unwrap().match_range('c'..'c'));
624	assert!(Position::new(input, `0`).unwrap().match_range('a'..'嗨'));
625	}
626
627	#[test]
628	fn match_insensitive() {
629	let input = "AsdASdF";
630
631	assert!(Position::new(input, `0`).unwrap().match_insensitive("asd"));
632	assert!(Position::new(input, `3`).unwrap().match_insensitive("asdf"));
633	}
634
635	#[test]
636	fn cmp() {
637	let input = "a";
638	let start = Position::from_start(input);
639	let mut end = start;
640
641	assert!(end.skip(`1`));
642	let result = start.cmp(&end);
643
644	assert_eq!(result, Ordering::Less);
645	}
646
647	#[test]
648	#[should_panic]
649	fn cmp_panic() {
650	let input1 = "a";
651	let input2 = "b";
652	let pos1 = Position::from_start(input1);
653	let pos2 = Position::from_start(input2);
654
655	let _ = pos1.cmp(&pos2);
656	}
657
658	#[test]
659	#[cfg(feature = "std")]
660	fn hash() {
661	use std::collections::HashSet;
662
663	let input = "a";
664	let start = Position::from_start(input);
665	let mut positions = HashSet::new();
666
667	positions.insert(start);
668	}
669	}
670