1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use alloc::format; |
11 | use alloc::rc::Rc; |
12 | #[cfg (feature = "pretty-print" )] |
13 | use alloc::string::String; |
14 | use alloc::vec::Vec; |
15 | use core::borrow::Borrow; |
16 | use core::fmt; |
17 | use core::hash::{Hash, Hasher}; |
18 | use core::ptr; |
19 | use core::str; |
20 | |
21 | #[cfg (feature = "pretty-print" )] |
22 | use serde::ser::SerializeStruct; |
23 | |
24 | use super::line_index::LineIndex; |
25 | use super::pairs::{self, Pairs}; |
26 | use super::queueable_token::QueueableToken; |
27 | use super::tokens::{self, Tokens}; |
28 | use crate::span::Span; |
29 | use crate::RuleType; |
30 | |
31 | /// A matching pair of [`Token`]s and everything between them. |
32 | /// |
33 | /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the |
34 | /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well. |
35 | /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in |
36 | /// editors. |
37 | /// |
38 | /// [`Token`]: ../enum.Token.html |
39 | #[derive (Clone)] |
40 | pub struct Pair<'i, R> { |
41 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
42 | input: &'i str, |
43 | /// Token index into `queue`. |
44 | start: usize, |
45 | line_index: Rc<LineIndex>, |
46 | } |
47 | |
48 | pub fn new<'i, R: RuleType>( |
49 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
50 | input: &'i str, |
51 | line_index: Rc<LineIndex>, |
52 | start: usize, |
53 | ) -> Pair<'i, R> { |
54 | Pair { |
55 | queue, |
56 | input, |
57 | start, |
58 | line_index, |
59 | } |
60 | } |
61 | |
62 | impl<'i, R: RuleType> Pair<'i, R> { |
63 | /// Returns the `Rule` of the `Pair`. |
64 | /// |
65 | /// # Examples |
66 | /// |
67 | /// ``` |
68 | /// # use std::rc::Rc; |
69 | /// # use pest; |
70 | /// # #[allow (non_camel_case_types)] |
71 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
72 | /// enum Rule { |
73 | /// a |
74 | /// } |
75 | /// |
76 | /// let input = "" ; |
77 | /// let pair = pest::state(input, |state| { |
78 | /// // generating Token pair with Rule::a ... |
79 | /// # state.rule(Rule::a, |s| Ok(s)) |
80 | /// }).unwrap().next().unwrap(); |
81 | /// |
82 | /// assert_eq!(pair.as_rule(), Rule::a); |
83 | /// ``` |
84 | #[inline ] |
85 | pub fn as_rule(&self) -> R { |
86 | match self.queue[self.pair()] { |
87 | QueueableToken::End { rule, .. } => rule, |
88 | _ => unreachable!(), |
89 | } |
90 | } |
91 | |
92 | /// Captures a slice from the `&str` defined by the token `Pair`. |
93 | /// |
94 | /// # Examples |
95 | /// |
96 | /// ``` |
97 | /// # use std::rc::Rc; |
98 | /// # use pest; |
99 | /// # #[allow (non_camel_case_types)] |
100 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
101 | /// enum Rule { |
102 | /// ab |
103 | /// } |
104 | /// |
105 | /// let input = "ab" ; |
106 | /// let pair = pest::state(input, |state| { |
107 | /// // generating Token pair with Rule::ab ... |
108 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
109 | /// }).unwrap().next().unwrap(); |
110 | /// |
111 | /// assert_eq!(pair.as_str(), "ab" ); |
112 | /// ``` |
113 | #[inline ] |
114 | pub fn as_str(&self) -> &'i str { |
115 | let start = self.pos(self.start); |
116 | let end = self.pos(self.pair()); |
117 | |
118 | // Generated positions always come from Positions and are UTF-8 borders. |
119 | &self.input[start..end] |
120 | } |
121 | |
122 | /// Returns the input string of the `Pair`. |
123 | /// |
124 | /// This function returns the input string of the `Pair` as a `&str`. This is the source string |
125 | /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of |
126 | /// the `Pair` or to perform further processing on the string. |
127 | /// |
128 | /// # Examples |
129 | /// |
130 | /// ``` |
131 | /// # use std::rc::Rc; |
132 | /// # use pest; |
133 | /// # #[allow (non_camel_case_types)] |
134 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
135 | /// enum Rule { |
136 | /// ab |
137 | /// } |
138 | /// |
139 | /// // Example: Get input string from a Pair |
140 | /// |
141 | /// let input = "ab" ; |
142 | /// let pair = pest::state(input, |state| { |
143 | /// // generating Token pair with Rule::ab ... |
144 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
145 | /// }).unwrap().next().unwrap(); |
146 | /// |
147 | /// assert_eq!(pair.as_str(), "ab" ); |
148 | /// assert_eq!(input, pair.get_input()); |
149 | /// ``` |
150 | pub fn get_input(&self) -> &'i str { |
151 | self.input |
152 | } |
153 | |
154 | /// Returns the `Span` defined by the `Pair`, consuming it. |
155 | /// |
156 | /// # Examples |
157 | /// |
158 | /// ``` |
159 | /// # use std::rc::Rc; |
160 | /// # use pest; |
161 | /// # #[allow (non_camel_case_types)] |
162 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
163 | /// enum Rule { |
164 | /// ab |
165 | /// } |
166 | /// |
167 | /// let input = "ab" ; |
168 | /// let pair = pest::state(input, |state| { |
169 | /// // generating Token pair with Rule::ab ... |
170 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
171 | /// }).unwrap().next().unwrap(); |
172 | /// |
173 | /// assert_eq!(pair.into_span().as_str(), "ab" ); |
174 | /// ``` |
175 | #[inline ] |
176 | #[deprecated (since = "2.0.0" , note = "Please use `as_span` instead" )] |
177 | pub fn into_span(self) -> Span<'i> { |
178 | self.as_span() |
179 | } |
180 | |
181 | /// Returns the `Span` defined by the `Pair`, **without** consuming it. |
182 | /// |
183 | /// # Examples |
184 | /// |
185 | /// ``` |
186 | /// # use std::rc::Rc; |
187 | /// # use pest; |
188 | /// # #[allow (non_camel_case_types)] |
189 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
190 | /// enum Rule { |
191 | /// ab |
192 | /// } |
193 | /// |
194 | /// let input = "ab" ; |
195 | /// let pair = pest::state(input, |state| { |
196 | /// // generating Token pair with Rule::ab ... |
197 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
198 | /// }).unwrap().next().unwrap(); |
199 | /// |
200 | /// assert_eq!(pair.as_span().as_str(), "ab" ); |
201 | /// ``` |
202 | #[inline ] |
203 | pub fn as_span(&self) -> Span<'i> { |
204 | let start = self.pos(self.start); |
205 | let end = self.pos(self.pair()); |
206 | |
207 | Span::new_internal(self.input, start, end) |
208 | } |
209 | |
210 | /// Get current node tag |
211 | #[inline ] |
212 | pub fn as_node_tag(&self) -> Option<&str> { |
213 | match &self.queue[self.pair()] { |
214 | QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()), |
215 | _ => None, |
216 | } |
217 | } |
218 | |
219 | /// Returns the inner `Pairs` between the `Pair`, consuming it. |
220 | /// |
221 | /// # Examples |
222 | /// |
223 | /// ``` |
224 | /// # use std::rc::Rc; |
225 | /// # use pest; |
226 | /// # #[allow (non_camel_case_types)] |
227 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
228 | /// enum Rule { |
229 | /// a |
230 | /// } |
231 | /// |
232 | /// let input = "" ; |
233 | /// let pair = pest::state(input, |state| { |
234 | /// // generating Token pair with Rule::a ... |
235 | /// # state.rule(Rule::a, |s| Ok(s)) |
236 | /// }).unwrap().next().unwrap(); |
237 | /// |
238 | /// assert!(pair.into_inner().next().is_none()); |
239 | /// ``` |
240 | #[inline ] |
241 | pub fn into_inner(self) -> Pairs<'i, R> { |
242 | let pair = self.pair(); |
243 | |
244 | pairs::new( |
245 | self.queue, |
246 | self.input, |
247 | Some(self.line_index), |
248 | self.start + 1, |
249 | pair, |
250 | ) |
251 | } |
252 | |
253 | /// Returns the `Tokens` for the `Pair`. |
254 | /// |
255 | /// # Examples |
256 | /// |
257 | /// ``` |
258 | /// # use std::rc::Rc; |
259 | /// # use pest; |
260 | /// # #[allow (non_camel_case_types)] |
261 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
262 | /// enum Rule { |
263 | /// a |
264 | /// } |
265 | /// |
266 | /// let input = "" ; |
267 | /// let pair = pest::state(input, |state| { |
268 | /// // generating Token pair with Rule::a ... |
269 | /// # state.rule(Rule::a, |s| Ok(s)) |
270 | /// }).unwrap().next().unwrap(); |
271 | /// let tokens: Vec<_> = pair.tokens().collect(); |
272 | /// |
273 | /// assert_eq!(tokens.len(), 2); |
274 | /// ``` |
275 | #[inline ] |
276 | pub fn tokens(self) -> Tokens<'i, R> { |
277 | let end = self.pair(); |
278 | |
279 | tokens::new(self.queue, self.input, self.start, end + 1) |
280 | } |
281 | |
282 | /// Generates a string that stores the lexical information of `self` in |
283 | /// a pretty-printed JSON format. |
284 | #[cfg (feature = "pretty-print" )] |
285 | pub fn to_json(&self) -> String { |
286 | ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json." ) |
287 | } |
288 | |
289 | /// Returns the `line`, `col` of this pair start. |
290 | pub fn line_col(&self) -> (usize, usize) { |
291 | let pos = self.pos(self.start); |
292 | self.line_index.line_col(self.input, pos) |
293 | } |
294 | |
295 | fn pair(&self) -> usize { |
296 | match self.queue[self.start] { |
297 | QueueableToken::Start { |
298 | end_token_index, .. |
299 | } => end_token_index, |
300 | _ => unreachable!(), |
301 | } |
302 | } |
303 | |
304 | fn pos(&self, index: usize) -> usize { |
305 | match self.queue[index] { |
306 | QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => { |
307 | input_pos |
308 | } |
309 | } |
310 | } |
311 | } |
312 | |
313 | impl<'i, R: RuleType> Pairs<'i, R> { |
314 | /// Create a new `Pairs` iterator containing just the single `Pair`. |
315 | pub fn single(pair: Pair<'i, R>) -> Self { |
316 | let end: usize = pair.pair(); |
317 | pairs::new( |
318 | pair.queue, |
319 | pair.input, |
320 | line_index:Some(pair.line_index), |
321 | pair.start, |
322 | end, |
323 | ) |
324 | } |
325 | } |
326 | |
327 | impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { |
328 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
329 | let pair: &mut DebugStruct<'_, '_> = &mut f.debug_struct(name:"Pair" ); |
330 | pair.field(name:"rule" , &self.as_rule()); |
331 | // In order not to break compatibility |
332 | if let Some(s: &str) = self.as_node_tag() { |
333 | pair.field(name:"node_tag" , &s); |
334 | } |
335 | pair&mut DebugStruct<'_, '_>.field("span" , &self.as_span()) |
336 | .field(name:"inner" , &self.clone().into_inner().collect::<Vec<_>>()) |
337 | .finish() |
338 | } |
339 | } |
340 | |
341 | impl<'i, R: RuleType> fmt::Display for Pair<'i, R> { |
342 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
343 | let rule: R = self.as_rule(); |
344 | let start: usize = self.pos(self.start); |
345 | let end: usize = self.pos(self.pair()); |
346 | let mut pairs: impl Iterator- >
= self.clone().into_inner().peekable(); |
347 | |
348 | if pairs.peek().is_none() { |
349 | write!(f, " {:?}( {}, {})" , rule, start, end) |
350 | } else { |
351 | write!( |
352 | f, |
353 | " {:?}( {}, {}, [ {}])" , |
354 | rule, |
355 | start, |
356 | end, |
357 | pairs |
358 | .map(|pair| format!(" {}" , pair)) |
359 | .collect::<Vec<_>>() |
360 | .join(", " ) |
361 | ) |
362 | } |
363 | } |
364 | } |
365 | |
366 | impl<'i, R: PartialEq> PartialEq for Pair<'i, R> { |
367 | fn eq(&self, other: &Pair<'i, R>) -> bool { |
368 | Rc::ptr_eq(&self.queue, &other.queue) |
369 | && ptr::eq(self.input, b:other.input) |
370 | && self.start == other.start |
371 | } |
372 | } |
373 | |
374 | impl<'i, R: Eq> Eq for Pair<'i, R> {} |
375 | |
376 | impl<'i, R: Hash> Hash for Pair<'i, R> { |
377 | fn hash<H: Hasher>(&self, state: &mut H) { |
378 | (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); |
379 | (self.input as *const str).hash(state); |
380 | self.start.hash(state); |
381 | } |
382 | } |
383 | |
384 | #[cfg (feature = "pretty-print" )] |
385 | impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> { |
386 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
387 | where |
388 | S: ::serde::Serializer, |
389 | { |
390 | let start = self.pos(self.start); |
391 | let end = self.pos(self.pair()); |
392 | let rule = format!("{:?}" , self.as_rule()); |
393 | let inner = self.clone().into_inner(); |
394 | |
395 | let mut ser = serializer.serialize_struct("Pairs" , 3)?; |
396 | ser.serialize_field("pos" , &(start, end))?; |
397 | ser.serialize_field("rule" , &rule)?; |
398 | |
399 | if inner.peek().is_none() { |
400 | ser.serialize_field("inner" , &self.as_str())?; |
401 | } else { |
402 | ser.serialize_field("inner" , &inner)?; |
403 | } |
404 | |
405 | ser.end() |
406 | } |
407 | } |
408 | |
409 | #[cfg (test)] |
410 | mod tests { |
411 | use crate::macros::tests::*; |
412 | use crate::parser::Parser; |
413 | |
414 | #[test ] |
415 | #[cfg (feature = "pretty-print" )] |
416 | fn test_pretty_print() { |
417 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); |
418 | |
419 | let expected = r#"{ |
420 | "pos": [ |
421 | 0, |
422 | 3 |
423 | ], |
424 | "rule": "a", |
425 | "inner": { |
426 | "pos": [ |
427 | 1, |
428 | 2 |
429 | ], |
430 | "pairs": [ |
431 | { |
432 | "pos": [ |
433 | 1, |
434 | 2 |
435 | ], |
436 | "rule": "b", |
437 | "inner": "b" |
438 | } |
439 | ] |
440 | } |
441 | }"# ; |
442 | |
443 | assert_eq!(expected, pair.to_json()); |
444 | } |
445 | |
446 | #[test ] |
447 | fn pair_into_inner() { |
448 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); // the tokens a(b()) |
449 | |
450 | let pairs = pair.into_inner(); // the tokens b() |
451 | |
452 | assert_eq!(2, pairs.tokens().count()); |
453 | } |
454 | |
455 | #[test ] |
456 | fn get_input_of_pair() { |
457 | let input = "abcde" ; |
458 | let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap(); |
459 | |
460 | assert_eq!(input, pair.get_input()); |
461 | } |
462 | } |
463 | |