1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use core::fmt; |
11 | use core::hash::{Hash, Hasher}; |
12 | use core::ops::{Bound, RangeBounds}; |
13 | use core::ptr; |
14 | use core::str; |
15 | |
16 | use crate::position; |
17 | |
18 | /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`]. |
19 | /// |
20 | /// [two `Position`s]: struct.Position.html#method.span |
21 | /// [`Pair`]: ../iterators/struct.Pair.html#method.span |
22 | #[derive (Clone, Copy)] |
23 | pub struct Span<'i> { |
24 | input: &'i str, |
25 | start: usize, |
26 | end: usize, |
27 | } |
28 | |
29 | impl<'i> Span<'i> { |
30 | /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.) |
31 | pub(crate) fn new_internal(input: &str, start: usize, end: usize) -> Span<'_> { |
32 | debug_assert!(input.get(start..end).is_some()); |
33 | Span { input, start, end } |
34 | } |
35 | |
36 | /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index |
37 | /// into `input`. |
38 | /// |
39 | /// # Examples |
40 | /// |
41 | /// ``` |
42 | /// # use pest::Span; |
43 | /// let input = "Hello!" ; |
44 | /// assert_eq!(None, Span::new(input, 100, 0)); |
45 | /// assert!(Span::new(input, 0, input.len()).is_some()); |
46 | /// ``` |
47 | pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> { |
48 | if input.get(start..end).is_some() { |
49 | Some(Span { input, start, end }) |
50 | } else { |
51 | None |
52 | } |
53 | } |
54 | |
55 | /// Attempts to create a new span based on a sub-range. |
56 | /// |
57 | /// ``` |
58 | /// use pest::Span; |
59 | /// let input = "Hello World!" ; |
60 | /// let world = Span::new(input, 6, input.len()).unwrap(); |
61 | /// let orl = world.get(1..=3); |
62 | /// assert!(orl.is_some()); |
63 | /// assert_eq!(orl.unwrap().as_str(), "orl" ); |
64 | /// ``` |
65 | /// |
66 | /// # Examples |
67 | pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> { |
68 | let start = match range.start_bound() { |
69 | Bound::Included(offset) => *offset, |
70 | Bound::Excluded(offset) => *offset + 1, |
71 | Bound::Unbounded => 0, |
72 | }; |
73 | let end = match range.end_bound() { |
74 | Bound::Included(offset) => *offset + 1, |
75 | Bound::Excluded(offset) => *offset, |
76 | Bound::Unbounded => self.as_str().len(), |
77 | }; |
78 | |
79 | self.as_str().get(start..end).map(|_| Span { |
80 | input: self.input, |
81 | start: self.start + start, |
82 | end: self.start + end, |
83 | }) |
84 | } |
85 | |
86 | /// Returns the `Span`'s start byte position as a `usize`. |
87 | /// |
88 | /// # Examples |
89 | /// |
90 | /// ``` |
91 | /// # use pest::Position; |
92 | /// let input = "ab" ; |
93 | /// let start = Position::from_start(input); |
94 | /// let end = start.clone(); |
95 | /// let span = start.span(&end); |
96 | /// |
97 | /// assert_eq!(span.start(), 0); |
98 | /// ``` |
99 | #[inline ] |
100 | pub fn start(&self) -> usize { |
101 | self.start |
102 | } |
103 | |
104 | /// Returns the `Span`'s end byte position as a `usize`. |
105 | /// |
106 | /// # Examples |
107 | /// |
108 | /// ``` |
109 | /// # use pest::Position; |
110 | /// let input = "ab" ; |
111 | /// let start = Position::from_start(input); |
112 | /// let end = start.clone(); |
113 | /// let span = start.span(&end); |
114 | /// |
115 | /// assert_eq!(span.end(), 0); |
116 | /// ``` |
117 | #[inline ] |
118 | pub fn end(&self) -> usize { |
119 | self.end |
120 | } |
121 | |
122 | /// Returns the `Span`'s start `Position`. |
123 | /// |
124 | /// # Examples |
125 | /// |
126 | /// ``` |
127 | /// # use pest::Position; |
128 | /// let input = "ab" ; |
129 | /// let start = Position::from_start(input); |
130 | /// let end = start.clone(); |
131 | /// let span = start.clone().span(&end); |
132 | /// |
133 | /// assert_eq!(span.start_pos(), start); |
134 | /// ``` |
135 | #[inline ] |
136 | pub fn start_pos(&self) -> position::Position<'i> { |
137 | position::Position::new_internal(self.input, self.start) |
138 | } |
139 | |
140 | /// Returns the `Span`'s end `Position`. |
141 | /// |
142 | /// # Examples |
143 | /// |
144 | /// ``` |
145 | /// # use pest::Position; |
146 | /// let input = "ab" ; |
147 | /// let start = Position::from_start(input); |
148 | /// let end = start.clone(); |
149 | /// let span = start.span(&end); |
150 | /// |
151 | /// assert_eq!(span.end_pos(), end); |
152 | /// ``` |
153 | #[inline ] |
154 | pub fn end_pos(&self) -> position::Position<'i> { |
155 | position::Position::new_internal(self.input, self.end) |
156 | } |
157 | |
158 | /// Splits the `Span` into a pair of `Position`s. |
159 | /// |
160 | /// # Examples |
161 | /// |
162 | /// ``` |
163 | /// # use pest::Position; |
164 | /// let input = "ab" ; |
165 | /// let start = Position::from_start(input); |
166 | /// let end = start.clone(); |
167 | /// let span = start.clone().span(&end); |
168 | /// |
169 | /// assert_eq!(span.split(), (start, end)); |
170 | /// ``` |
171 | #[inline ] |
172 | pub fn split(self) -> (position::Position<'i>, position::Position<'i>) { |
173 | let pos1 = position::Position::new_internal(self.input, self.start); |
174 | let pos2 = position::Position::new_internal(self.input, self.end); |
175 | |
176 | (pos1, pos2) |
177 | } |
178 | |
179 | /// Captures a slice from the `&str` defined by the `Span`. |
180 | /// |
181 | /// # Examples |
182 | /// |
183 | /// ``` |
184 | /// # use pest; |
185 | /// # #[allow (non_camel_case_types)] |
186 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
187 | /// enum Rule {} |
188 | /// |
189 | /// let input = "abc" ; |
190 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap(); |
191 | /// let start_pos = state.position().clone(); |
192 | /// state = state.match_string("b" ).unwrap(); |
193 | /// let span = start_pos.span(&state.position().clone()); |
194 | /// assert_eq!(span.as_str(), "b" ); |
195 | /// ``` |
196 | #[inline ] |
197 | pub fn as_str(&self) -> &'i str { |
198 | // Span's start and end positions are always a UTF-8 borders. |
199 | &self.input[self.start..self.end] |
200 | } |
201 | |
202 | /// Returns the input string of the `Span`. |
203 | /// |
204 | /// This function returns the input string of the `Span` as a `&str`. This is the source string |
205 | /// from which the `Span` was created. The returned `&str` can be used to examine the contents of |
206 | /// the `Span` or to perform further processing on the string. |
207 | /// |
208 | /// # Examples |
209 | /// |
210 | /// ``` |
211 | /// # use pest; |
212 | /// # use pest::Span; |
213 | /// |
214 | /// // Example: Get input string from a span |
215 | /// let input = "abc \ndef \nghi" ; |
216 | /// let span = Span::new(input, 1, 7).unwrap(); |
217 | /// assert_eq!(span.get_input(), input); |
218 | /// ``` |
219 | pub fn get_input(&self) -> &'i str { |
220 | self.input |
221 | } |
222 | |
223 | /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line. |
224 | /// |
225 | /// # Examples |
226 | /// |
227 | /// ``` |
228 | /// # use pest; |
229 | /// # #[allow (non_camel_case_types)] |
230 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
231 | /// enum Rule {} |
232 | /// |
233 | /// let input = "a \nb \nc" ; |
234 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
235 | /// let start_pos = state.position().clone(); |
236 | /// state = state.match_string("b \nc" ).unwrap(); |
237 | /// let span = start_pos.span(&state.position().clone()); |
238 | /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b \n" , "c" ]); |
239 | /// ``` |
240 | #[inline ] |
241 | pub fn lines(&self) -> Lines<'_> { |
242 | Lines { |
243 | inner: self.lines_span(), |
244 | } |
245 | } |
246 | |
247 | /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line. |
248 | /// |
249 | /// # Examples |
250 | /// |
251 | /// ``` |
252 | /// # use pest; |
253 | /// # use pest::Span; |
254 | /// # #[allow (non_camel_case_types)] |
255 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
256 | /// enum Rule {} |
257 | /// |
258 | /// let input = "a \nb \nc" ; |
259 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
260 | /// let start_pos = state.position().clone(); |
261 | /// state = state.match_string("b \nc" ).unwrap(); |
262 | /// let span = start_pos.span(&state.position().clone()); |
263 | /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]); |
264 | /// ``` |
265 | pub fn lines_span(&self) -> LinesSpan<'_> { |
266 | LinesSpan { |
267 | span: self, |
268 | pos: self.start, |
269 | } |
270 | } |
271 | } |
272 | |
273 | impl<'i> fmt::Debug for Span<'i> { |
274 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
275 | f&mut DebugStruct<'_, '_>.debug_struct("Span" ) |
276 | .field("str" , &self.as_str()) |
277 | .field("start" , &self.start) |
278 | .field(name:"end" , &self.end) |
279 | .finish() |
280 | } |
281 | } |
282 | |
283 | impl<'i> PartialEq for Span<'i> { |
284 | fn eq(&self, other: &Span<'i>) -> bool { |
285 | ptr::eq(self.input, b:other.input) && self.start == other.start && self.end == other.end |
286 | } |
287 | } |
288 | |
289 | impl<'i> Eq for Span<'i> {} |
290 | |
291 | impl<'i> Hash for Span<'i> { |
292 | fn hash<H: Hasher>(&self, state: &mut H) { |
293 | (self.input as *const str).hash(state); |
294 | self.start.hash(state); |
295 | self.end.hash(state); |
296 | } |
297 | } |
298 | |
299 | /// Merges two spans into one. |
300 | /// |
301 | /// This function merges two spans that are contiguous or overlapping into a single span |
302 | /// that covers the entire range of the two input spans. This is useful when you want to |
303 | /// aggregate information from multiple spans into a single entity. |
304 | /// |
305 | /// The function checks if the input spans are overlapping or contiguous by comparing their |
306 | /// start and end positions. If they are, a new span is created with the minimum start position |
307 | /// and the maximum end position of the two input spans. |
308 | /// |
309 | /// If the input spans are neither overlapping nor contiguous, the function returns None, |
310 | /// indicating that a merge operation was not possible. |
311 | /// |
312 | /// # Examples |
313 | /// |
314 | /// ``` |
315 | /// # use pest; |
316 | /// # use pest::Span; |
317 | /// # use pest::merge_spans; |
318 | /// |
319 | /// // Example 1: Contiguous spans |
320 | /// let input = "abc \ndef \nghi" ; |
321 | /// let span1 = Span::new(input, 1, 7).unwrap(); |
322 | /// let span2 = Span::new(input, 7, 11).unwrap(); |
323 | /// let merged = merge_spans(&span1, &span2).unwrap(); |
324 | /// assert_eq!(merged, Span::new(input, 1, 11).unwrap()); |
325 | /// |
326 | /// // Example 2: Overlapping spans |
327 | /// let input = "abc \ndef \nghi" ; |
328 | /// let span1 = Span::new(input, 1, 7).unwrap(); |
329 | /// let span2 = Span::new(input, 5, 11).unwrap(); |
330 | /// let merged = merge_spans(&span1, &span2).unwrap(); |
331 | /// assert_eq!(merged, Span::new(input, 1, 11).unwrap()); |
332 | /// |
333 | /// // Example 3: Non-contiguous spans |
334 | /// let input = "abc \ndef \nghi" ; |
335 | /// let span1 = Span::new(input, 1, 7).unwrap(); |
336 | /// let span2 = Span::new(input, 8, 11).unwrap(); |
337 | /// let merged = merge_spans(&span1, &span2); |
338 | /// assert!(merged.is_none()); |
339 | /// ``` |
340 | pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> { |
341 | if a.end() >= b.start() && a.start() <= b.end() { |
342 | // The spans overlap or are contiguous, so they can be merged. |
343 | Span::new( |
344 | a.get_input(), |
345 | start:core::cmp::min(a.start(), b.start()), |
346 | end:core::cmp::max(v1:a.end(), v2:b.end()), |
347 | ) |
348 | } else { |
349 | // The spans don't overlap and aren't contiguous, so they can't be merged. |
350 | None |
351 | } |
352 | } |
353 | |
354 | /// Line iterator for Spans, created by [`Span::lines_span()`]. |
355 | /// |
356 | /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each. |
357 | /// |
358 | /// [`Span::lines_span()`]: struct.Span.html#method.lines_span |
359 | pub struct LinesSpan<'i> { |
360 | span: &'i Span<'i>, |
361 | pos: usize, |
362 | } |
363 | |
364 | impl<'i> Iterator for LinesSpan<'i> { |
365 | type Item = Span<'i>; |
366 | fn next(&mut self) -> Option<Self::Item> { |
367 | if self.pos > self.span.end { |
368 | return None; |
369 | } |
370 | let pos: Position<'_> = position::Position::new(self.span.input, self.pos)?; |
371 | if pos.at_end() { |
372 | return None; |
373 | } |
374 | |
375 | let line_start: usize = pos.find_line_start(); |
376 | self.pos = pos.find_line_end(); |
377 | |
378 | Span::new(self.span.input, line_start, self.pos) |
379 | } |
380 | } |
381 | |
382 | /// Line iterator for Spans, created by [`Span::lines()`]. |
383 | /// |
384 | /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each. |
385 | /// |
386 | /// [`Span::lines()`]: struct.Span.html#method.lines |
387 | pub struct Lines<'i> { |
388 | inner: LinesSpan<'i>, |
389 | } |
390 | |
391 | impl<'i> Iterator for Lines<'i> { |
392 | type Item = &'i str; |
393 | fn next(&mut self) -> Option<Self::Item> { |
394 | self.inner.next().map(|span: Span<'i>| span.as_str()) |
395 | } |
396 | } |
397 | |
398 | #[cfg (test)] |
399 | mod tests { |
400 | use super::*; |
401 | use alloc::borrow::ToOwned; |
402 | use alloc::vec::Vec; |
403 | |
404 | #[test ] |
405 | fn get() { |
406 | let input = "abc123abc" ; |
407 | let span = Span::new(input, 3, input.len()).unwrap(); |
408 | assert_eq!(span.as_str(), "123abc" ); |
409 | assert_eq!(span.input, input); |
410 | |
411 | let span1 = span.get(..=2); |
412 | assert!(span1.is_some()); |
413 | assert_eq!(span1.unwrap().input, input); |
414 | assert_eq!(span1.unwrap().as_str(), "123" ); |
415 | |
416 | let span2 = span.get(..); |
417 | assert!(span2.is_some()); |
418 | assert_eq!(span2.unwrap().input, input); |
419 | assert_eq!(span2.unwrap().as_str(), "123abc" ); |
420 | |
421 | let span3 = span.get(3..); |
422 | assert!(span3.is_some()); |
423 | assert_eq!(span3.unwrap().input, input); |
424 | assert_eq!(span3.unwrap().as_str(), "abc" ); |
425 | |
426 | let span4 = span.get(0..0); |
427 | assert!(span4.is_some()); |
428 | assert_eq!(span4.unwrap().input, input); |
429 | assert_eq!(span4.unwrap().as_str(), "" ); |
430 | } |
431 | |
432 | #[test ] |
433 | fn get_fails() { |
434 | let input = "abc" ; |
435 | let span = Span::new(input, 0, input.len()).unwrap(); |
436 | |
437 | let span1 = span.get(0..100); |
438 | assert!(span1.is_none()); |
439 | |
440 | let span2 = span.get(100..200); |
441 | assert!(span2.is_none()); |
442 | } |
443 | |
444 | #[test ] |
445 | fn span_comp() { |
446 | let input = "abc \ndef \nghi" ; |
447 | let span = Span::new(input, 1, 7).unwrap(); |
448 | let span2 = Span::new(input, 50, 51); |
449 | assert!(span2.is_none()); |
450 | let span3 = Span::new(input, 0, 8).unwrap(); |
451 | assert!(span != span3); |
452 | } |
453 | |
454 | #[test ] |
455 | fn split() { |
456 | let input = "a" ; |
457 | let start = position::Position::from_start(input); |
458 | let mut end = start; |
459 | |
460 | assert!(end.skip(1)); |
461 | |
462 | let span = start.clone().span(&end.clone()); |
463 | |
464 | assert_eq!(span.split(), (start, end)); |
465 | } |
466 | |
467 | #[test ] |
468 | fn lines_mid() { |
469 | let input = "abc \ndef \nghi" ; |
470 | let span = Span::new(input, 1, 7).unwrap(); |
471 | let lines: Vec<_> = span.lines().collect(); |
472 | let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
473 | |
474 | assert_eq!(lines.len(), 2); |
475 | assert_eq!(lines[0], "abc \n" .to_owned()); |
476 | assert_eq!(lines[1], "def \n" .to_owned()); |
477 | assert_eq!(lines, lines_span) // Verify parity with lines_span() |
478 | } |
479 | |
480 | #[test ] |
481 | fn lines_eof() { |
482 | let input = "abc \ndef \nghi" ; |
483 | let span = Span::new(input, 5, 11).unwrap(); |
484 | assert!(span.end_pos().at_end()); |
485 | assert_eq!(span.end(), 11); |
486 | let lines: Vec<_> = span.lines().collect(); |
487 | let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
488 | |
489 | assert_eq!(lines.len(), 2); |
490 | assert_eq!(lines[0], "def \n" .to_owned()); |
491 | assert_eq!(lines[1], "ghi" .to_owned()); |
492 | assert_eq!(lines, lines_span) // Verify parity with lines_span() |
493 | } |
494 | |
495 | #[test ] |
496 | fn lines_span() { |
497 | let input = "abc \ndef \nghi" ; |
498 | let span = Span::new(input, 1, 7).unwrap(); |
499 | let lines_span: Vec<_> = span.lines_span().collect(); |
500 | let lines: Vec<_> = span.lines().collect(); |
501 | |
502 | assert_eq!(lines_span.len(), 2); |
503 | assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap()); |
504 | assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap()); |
505 | assert_eq!( |
506 | lines_span |
507 | .iter() |
508 | .map(|span| span.as_str()) |
509 | .collect::<Vec<_>>(), |
510 | lines |
511 | ); |
512 | } |
513 | |
514 | #[test ] |
515 | fn get_input_of_span() { |
516 | let input = "abc \ndef \nghi" ; |
517 | let span = Span::new(input, 1, 7).unwrap(); |
518 | |
519 | assert_eq!(span.get_input(), input); |
520 | } |
521 | |
522 | #[test ] |
523 | fn merge_contiguous() { |
524 | let input = "abc \ndef \nghi" ; |
525 | let span1 = Span::new(input, 1, 7).unwrap(); |
526 | let span2 = Span::new(input, 7, 11).unwrap(); |
527 | let merged = merge_spans(&span1, &span2).unwrap(); |
528 | |
529 | assert_eq!(merged, Span::new(input, 1, 11).unwrap()); |
530 | } |
531 | |
532 | #[test ] |
533 | fn merge_overlapping() { |
534 | let input = "abc \ndef \nghi" ; |
535 | let span1 = Span::new(input, 1, 7).unwrap(); |
536 | let span2 = Span::new(input, 5, 11).unwrap(); |
537 | let merged = merge_spans(&span1, &span2).unwrap(); |
538 | |
539 | assert_eq!(merged, Span::new(input, 1, 11).unwrap()); |
540 | } |
541 | |
542 | #[test ] |
543 | fn merge_non_contiguous() { |
544 | let input = "abc \ndef \nghi" ; |
545 | let span1 = Span::new(input, 1, 7).unwrap(); |
546 | let span2 = Span::new(input, 8, 11).unwrap(); |
547 | let merged = merge_spans(&span1, &span2); |
548 | |
549 | assert!(merged.is_none()); |
550 | } |
551 | } |
552 | |