1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use core::fmt; |
11 | use core::hash::{Hash, Hasher}; |
12 | use core::ops::{Bound, RangeBounds}; |
13 | use core::ptr; |
14 | use core::str; |
15 | |
16 | use crate::position; |
17 | |
18 | /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`]. |
19 | /// |
20 | /// [two `Position`s]: struct.Position.html#method.span |
21 | /// [`Pair`]: ../iterators/struct.Pair.html#method.span |
22 | #[derive (Clone, Copy)] |
23 | pub struct Span<'i> { |
24 | input: &'i str, |
25 | /// # Safety |
26 | /// |
27 | /// Must be a valid character boundary index into `input`. |
28 | start: usize, |
29 | /// # Safety |
30 | /// |
31 | /// Must be a valid character boundary index into `input`. |
32 | end: usize, |
33 | } |
34 | |
35 | impl<'i> Span<'i> { |
36 | /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.) |
37 | /// |
38 | /// # Safety |
39 | /// |
40 | /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic. |
41 | pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> { |
42 | debug_assert!(input.get(start..end).is_some()); |
43 | Span { input, start, end } |
44 | } |
45 | |
46 | /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index |
47 | /// into `input`. |
48 | /// |
49 | /// # Examples |
50 | /// |
51 | /// ``` |
52 | /// # use pest::Span; |
53 | /// let input = "Hello!" ; |
54 | /// assert_eq!(None, Span::new(input, 100, 0)); |
55 | /// assert!(Span::new(input, 0, input.len()).is_some()); |
56 | /// ``` |
57 | pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> { |
58 | if input.get(start..end).is_some() { |
59 | Some(Span { input, start, end }) |
60 | } else { |
61 | None |
62 | } |
63 | } |
64 | |
65 | /// Attempts to create a new span based on a sub-range. |
66 | /// |
67 | /// ``` |
68 | /// use pest::Span; |
69 | /// let input = "Hello World!" ; |
70 | /// let world = Span::new(input, 6, input.len()).unwrap(); |
71 | /// let orl = world.get(1..=3); |
72 | /// assert!(orl.is_some()); |
73 | /// assert_eq!(orl.unwrap().as_str(), "orl" ); |
74 | /// ``` |
75 | /// |
76 | /// # Examples |
77 | pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> { |
78 | let start = match range.start_bound() { |
79 | Bound::Included(offset) => *offset, |
80 | Bound::Excluded(offset) => *offset + 1, |
81 | Bound::Unbounded => 0, |
82 | }; |
83 | let end = match range.end_bound() { |
84 | Bound::Included(offset) => *offset + 1, |
85 | Bound::Excluded(offset) => *offset, |
86 | Bound::Unbounded => self.as_str().len(), |
87 | }; |
88 | |
89 | self.as_str().get(start..end).map(|_| Span { |
90 | input: self.input, |
91 | start: self.start + start, |
92 | end: self.start + end, |
93 | }) |
94 | } |
95 | |
96 | /// Returns the `Span`'s start byte position as a `usize`. |
97 | /// |
98 | /// # Examples |
99 | /// |
100 | /// ``` |
101 | /// # use pest::Position; |
102 | /// let input = "ab" ; |
103 | /// let start = Position::from_start(input); |
104 | /// let end = start.clone(); |
105 | /// let span = start.span(&end); |
106 | /// |
107 | /// assert_eq!(span.start(), 0); |
108 | /// ``` |
109 | #[inline ] |
110 | pub fn start(&self) -> usize { |
111 | self.start |
112 | } |
113 | |
114 | /// Returns the `Span`'s end byte position as a `usize`. |
115 | /// |
116 | /// # Examples |
117 | /// |
118 | /// ``` |
119 | /// # use pest::Position; |
120 | /// let input = "ab" ; |
121 | /// let start = Position::from_start(input); |
122 | /// let end = start.clone(); |
123 | /// let span = start.span(&end); |
124 | /// |
125 | /// assert_eq!(span.end(), 0); |
126 | /// ``` |
127 | #[inline ] |
128 | pub fn end(&self) -> usize { |
129 | self.end |
130 | } |
131 | |
132 | /// Returns the `Span`'s start `Position`. |
133 | /// |
134 | /// # Examples |
135 | /// |
136 | /// ``` |
137 | /// # use pest::Position; |
138 | /// let input = "ab" ; |
139 | /// let start = Position::from_start(input); |
140 | /// let end = start.clone(); |
141 | /// let span = start.clone().span(&end); |
142 | /// |
143 | /// assert_eq!(span.start_pos(), start); |
144 | /// ``` |
145 | #[inline ] |
146 | pub fn start_pos(&self) -> position::Position<'i> { |
147 | // Span's start position is always a UTF-8 border. |
148 | unsafe { position::Position::new_unchecked(self.input, self.start) } |
149 | } |
150 | |
151 | /// Returns the `Span`'s end `Position`. |
152 | /// |
153 | /// # Examples |
154 | /// |
155 | /// ``` |
156 | /// # use pest::Position; |
157 | /// let input = "ab" ; |
158 | /// let start = Position::from_start(input); |
159 | /// let end = start.clone(); |
160 | /// let span = start.span(&end); |
161 | /// |
162 | /// assert_eq!(span.end_pos(), end); |
163 | /// ``` |
164 | #[inline ] |
165 | pub fn end_pos(&self) -> position::Position<'i> { |
166 | // Span's end position is always a UTF-8 border. |
167 | unsafe { position::Position::new_unchecked(self.input, self.end) } |
168 | } |
169 | |
170 | /// Splits the `Span` into a pair of `Position`s. |
171 | /// |
172 | /// # Examples |
173 | /// |
174 | /// ``` |
175 | /// # use pest::Position; |
176 | /// let input = "ab" ; |
177 | /// let start = Position::from_start(input); |
178 | /// let end = start.clone(); |
179 | /// let span = start.clone().span(&end); |
180 | /// |
181 | /// assert_eq!(span.split(), (start, end)); |
182 | /// ``` |
183 | #[inline ] |
184 | pub fn split(self) -> (position::Position<'i>, position::Position<'i>) { |
185 | // Span's start and end positions are always a UTF-8 borders. |
186 | let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) }; |
187 | let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) }; |
188 | |
189 | (pos1, pos2) |
190 | } |
191 | |
192 | /// Captures a slice from the `&str` defined by the `Span`. |
193 | /// |
194 | /// # Examples |
195 | /// |
196 | /// ``` |
197 | /// # use pest; |
198 | /// # #[allow (non_camel_case_types)] |
199 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
200 | /// enum Rule {} |
201 | /// |
202 | /// let input = "abc" ; |
203 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap(); |
204 | /// let start_pos = state.position().clone(); |
205 | /// state = state.match_string("b" ).unwrap(); |
206 | /// let span = start_pos.span(&state.position().clone()); |
207 | /// assert_eq!(span.as_str(), "b" ); |
208 | /// ``` |
209 | #[inline ] |
210 | pub fn as_str(&self) -> &'i str { |
211 | // Span's start and end positions are always a UTF-8 borders. |
212 | &self.input[self.start..self.end] |
213 | } |
214 | |
215 | /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line. |
216 | /// |
217 | /// # Examples |
218 | /// |
219 | /// ``` |
220 | /// # use pest; |
221 | /// # #[allow (non_camel_case_types)] |
222 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
223 | /// enum Rule {} |
224 | /// |
225 | /// let input = "a \nb \nc" ; |
226 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
227 | /// let start_pos = state.position().clone(); |
228 | /// state = state.match_string("b \nc" ).unwrap(); |
229 | /// let span = start_pos.span(&state.position().clone()); |
230 | /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b \n" , "c" ]); |
231 | /// ``` |
232 | #[inline ] |
233 | pub fn lines(&self) -> Lines<'_> { |
234 | Lines { |
235 | inner: self.lines_span(), |
236 | } |
237 | } |
238 | |
239 | /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line. |
240 | /// |
241 | /// # Examples |
242 | /// |
243 | /// ``` |
244 | /// # use pest; |
245 | /// # use pest::Span; |
246 | /// # #[allow (non_camel_case_types)] |
247 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
248 | /// enum Rule {} |
249 | /// |
250 | /// let input = "a \nb \nc" ; |
251 | /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
252 | /// let start_pos = state.position().clone(); |
253 | /// state = state.match_string("b \nc" ).unwrap(); |
254 | /// let span = start_pos.span(&state.position().clone()); |
255 | /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]); |
256 | /// ``` |
257 | pub fn lines_span(&self) -> LinesSpan<'_> { |
258 | LinesSpan { |
259 | span: self, |
260 | pos: self.start, |
261 | } |
262 | } |
263 | } |
264 | |
265 | impl<'i> fmt::Debug for Span<'i> { |
266 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
267 | f&mut DebugStruct<'_, '_>.debug_struct("Span" ) |
268 | .field("str" , &self.as_str()) |
269 | .field("start" , &self.start) |
270 | .field(name:"end" , &self.end) |
271 | .finish() |
272 | } |
273 | } |
274 | |
275 | impl<'i> PartialEq for Span<'i> { |
276 | fn eq(&self, other: &Span<'i>) -> bool { |
277 | ptr::eq(self.input, b:other.input) && self.start == other.start && self.end == other.end |
278 | } |
279 | } |
280 | |
281 | impl<'i> Eq for Span<'i> {} |
282 | |
283 | impl<'i> Hash for Span<'i> { |
284 | fn hash<H: Hasher>(&self, state: &mut H) { |
285 | (self.input as *const str).hash(state); |
286 | self.start.hash(state); |
287 | self.end.hash(state); |
288 | } |
289 | } |
290 | |
291 | /// Line iterator for Spans, created by [`Span::lines_span()`]. |
292 | /// |
293 | /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each. |
294 | /// |
295 | /// [`Span::lines_span()`]: struct.Span.html#method.lines_span |
296 | pub struct LinesSpan<'i> { |
297 | span: &'i Span<'i>, |
298 | pos: usize, |
299 | } |
300 | |
301 | impl<'i> Iterator for LinesSpan<'i> { |
302 | type Item = Span<'i>; |
303 | fn next(&mut self) -> Option<Self::Item> { |
304 | if self.pos > self.span.end { |
305 | return None; |
306 | } |
307 | let pos: Position<'_> = position::Position::new(self.span.input, self.pos)?; |
308 | if pos.at_end() { |
309 | return None; |
310 | } |
311 | |
312 | let line_start: usize = pos.find_line_start(); |
313 | self.pos = pos.find_line_end(); |
314 | |
315 | Span::new(self.span.input, line_start, self.pos) |
316 | } |
317 | } |
318 | |
319 | /// Line iterator for Spans, created by [`Span::lines()`]. |
320 | /// |
321 | /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each. |
322 | /// |
323 | /// [`Span::lines()`]: struct.Span.html#method.lines |
324 | pub struct Lines<'i> { |
325 | inner: LinesSpan<'i>, |
326 | } |
327 | |
328 | impl<'i> Iterator for Lines<'i> { |
329 | type Item = &'i str; |
330 | fn next(&mut self) -> Option<Self::Item> { |
331 | self.inner.next().map(|span: Span<'_>| span.as_str()) |
332 | } |
333 | } |
334 | |
335 | #[cfg (test)] |
336 | mod tests { |
337 | use super::*; |
338 | use alloc::borrow::ToOwned; |
339 | use alloc::vec::Vec; |
340 | |
341 | #[test ] |
342 | fn get() { |
343 | let input = "abc123abc" ; |
344 | let span = Span::new(input, 3, input.len()).unwrap(); |
345 | assert_eq!(span.as_str(), "123abc" ); |
346 | assert_eq!(span.input, input); |
347 | |
348 | let span1 = span.get(..=2); |
349 | assert!(span1.is_some()); |
350 | assert_eq!(span1.unwrap().input, input); |
351 | assert_eq!(span1.unwrap().as_str(), "123" ); |
352 | |
353 | let span2 = span.get(..); |
354 | assert!(span2.is_some()); |
355 | assert_eq!(span2.unwrap().input, input); |
356 | assert_eq!(span2.unwrap().as_str(), "123abc" ); |
357 | |
358 | let span3 = span.get(3..); |
359 | assert!(span3.is_some()); |
360 | assert_eq!(span3.unwrap().input, input); |
361 | assert_eq!(span3.unwrap().as_str(), "abc" ); |
362 | |
363 | let span4 = span.get(0..0); |
364 | assert!(span4.is_some()); |
365 | assert_eq!(span4.unwrap().input, input); |
366 | assert_eq!(span4.unwrap().as_str(), "" ); |
367 | } |
368 | |
369 | #[test ] |
370 | fn get_fails() { |
371 | let input = "abc" ; |
372 | let span = Span::new(input, 0, input.len()).unwrap(); |
373 | |
374 | let span1 = span.get(0..100); |
375 | assert!(span1.is_none()); |
376 | |
377 | let span2 = span.get(100..200); |
378 | assert!(span2.is_none()); |
379 | } |
380 | |
381 | #[test ] |
382 | fn span_comp() { |
383 | let input = "abc \ndef \nghi" ; |
384 | let span = Span::new(input, 1, 7).unwrap(); |
385 | let span2 = Span::new(input, 50, 51); |
386 | assert!(span2.is_none()); |
387 | let span3 = Span::new(input, 0, 8).unwrap(); |
388 | assert!(span != span3); |
389 | } |
390 | |
391 | #[test ] |
392 | fn split() { |
393 | let input = "a" ; |
394 | let start = position::Position::from_start(input); |
395 | let mut end = start; |
396 | |
397 | assert!(end.skip(1)); |
398 | |
399 | let span = start.clone().span(&end.clone()); |
400 | |
401 | assert_eq!(span.split(), (start, end)); |
402 | } |
403 | |
404 | #[test ] |
405 | fn lines_mid() { |
406 | let input = "abc \ndef \nghi" ; |
407 | let span = Span::new(input, 1, 7).unwrap(); |
408 | let lines: Vec<_> = span.lines().collect(); |
409 | let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
410 | |
411 | assert_eq!(lines.len(), 2); |
412 | assert_eq!(lines[0], "abc \n" .to_owned()); |
413 | assert_eq!(lines[1], "def \n" .to_owned()); |
414 | assert_eq!(lines, lines_span) // Verify parity with lines_span() |
415 | } |
416 | |
417 | #[test ] |
418 | fn lines_eof() { |
419 | let input = "abc \ndef \nghi" ; |
420 | let span = Span::new(input, 5, 11).unwrap(); |
421 | assert!(span.end_pos().at_end()); |
422 | assert_eq!(span.end(), 11); |
423 | let lines: Vec<_> = span.lines().collect(); |
424 | let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
425 | |
426 | assert_eq!(lines.len(), 2); |
427 | assert_eq!(lines[0], "def \n" .to_owned()); |
428 | assert_eq!(lines[1], "ghi" .to_owned()); |
429 | assert_eq!(lines, lines_span) // Verify parity with lines_span() |
430 | } |
431 | |
432 | #[test ] |
433 | fn lines_span() { |
434 | let input = "abc \ndef \nghi" ; |
435 | let span = Span::new(input, 1, 7).unwrap(); |
436 | let lines_span: Vec<_> = span.lines_span().collect(); |
437 | let lines: Vec<_> = span.lines().collect(); |
438 | |
439 | assert_eq!(lines_span.len(), 2); |
440 | assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap()); |
441 | assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap()); |
442 | assert_eq!( |
443 | lines_span |
444 | .iter() |
445 | .map(|span| span.as_str()) |
446 | .collect::<Vec<_>>(), |
447 | lines |
448 | ); |
449 | } |
450 | } |
451 | |