1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | use alloc::format; |
11 | use alloc::rc::Rc; |
12 | #[cfg (feature = "pretty-print" )] |
13 | use alloc::string::String; |
14 | use alloc::vec::Vec; |
15 | use core::borrow::Borrow; |
16 | use core::fmt; |
17 | use core::hash::{Hash, Hasher}; |
18 | use core::ptr; |
19 | use core::str; |
20 | |
21 | #[cfg (feature = "pretty-print" )] |
22 | use serde::ser::SerializeStruct; |
23 | |
24 | use super::line_index::LineIndex; |
25 | use super::pairs::{self, Pairs}; |
26 | use super::queueable_token::QueueableToken; |
27 | use super::tokens::{self, Tokens}; |
28 | use crate::span::{self, Span}; |
29 | use crate::RuleType; |
30 | |
31 | /// A matching pair of [`Token`]s and everything between them. |
32 | /// |
33 | /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the |
34 | /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well. |
35 | /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in |
36 | /// editors. |
37 | /// |
38 | /// [`Token`]: ../enum.Token.html |
39 | #[derive (Clone)] |
40 | pub struct Pair<'i, R> { |
41 | /// # Safety |
42 | /// |
43 | /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. |
44 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
45 | input: &'i str, |
46 | /// Token index into `queue`. |
47 | start: usize, |
48 | line_index: Rc<LineIndex>, |
49 | } |
50 | |
51 | /// # Safety |
52 | /// |
53 | /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`. |
54 | pub unsafe fn new<'i, R: RuleType>( |
55 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
56 | input: &'i str, |
57 | line_index: Rc<LineIndex>, |
58 | start: usize, |
59 | ) -> Pair<'i, R> { |
60 | Pair { |
61 | queue, |
62 | input, |
63 | start, |
64 | line_index, |
65 | } |
66 | } |
67 | |
68 | impl<'i, R: RuleType> Pair<'i, R> { |
69 | /// Returns the `Rule` of the `Pair`. |
70 | /// |
71 | /// # Examples |
72 | /// |
73 | /// ``` |
74 | /// # use std::rc::Rc; |
75 | /// # use pest; |
76 | /// # #[allow (non_camel_case_types)] |
77 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
78 | /// enum Rule { |
79 | /// a |
80 | /// } |
81 | /// |
82 | /// let input = "" ; |
83 | /// let pair = pest::state(input, |state| { |
84 | /// // generating Token pair with Rule::a ... |
85 | /// # state.rule(Rule::a, |s| Ok(s)) |
86 | /// }).unwrap().next().unwrap(); |
87 | /// |
88 | /// assert_eq!(pair.as_rule(), Rule::a); |
89 | /// ``` |
90 | #[inline ] |
91 | pub fn as_rule(&self) -> R { |
92 | match self.queue[self.pair()] { |
93 | QueueableToken::End { rule, .. } => rule, |
94 | _ => unreachable!(), |
95 | } |
96 | } |
97 | |
98 | /// Captures a slice from the `&str` defined by the token `Pair`. |
99 | /// |
100 | /// # Examples |
101 | /// |
102 | /// ``` |
103 | /// # use std::rc::Rc; |
104 | /// # use pest; |
105 | /// # #[allow (non_camel_case_types)] |
106 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
107 | /// enum Rule { |
108 | /// ab |
109 | /// } |
110 | /// |
111 | /// let input = "ab" ; |
112 | /// let pair = pest::state(input, |state| { |
113 | /// // generating Token pair with Rule::ab ... |
114 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
115 | /// }).unwrap().next().unwrap(); |
116 | /// |
117 | /// assert_eq!(pair.as_str(), "ab" ); |
118 | /// ``` |
119 | #[inline ] |
120 | pub fn as_str(&self) -> &'i str { |
121 | let start = self.pos(self.start); |
122 | let end = self.pos(self.pair()); |
123 | |
124 | // Generated positions always come from Positions and are UTF-8 borders. |
125 | &self.input[start..end] |
126 | } |
127 | |
128 | /// Returns the `Span` defined by the `Pair`, consuming it. |
129 | /// |
130 | /// # Examples |
131 | /// |
132 | /// ``` |
133 | /// # use std::rc::Rc; |
134 | /// # use pest; |
135 | /// # #[allow (non_camel_case_types)] |
136 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
137 | /// enum Rule { |
138 | /// ab |
139 | /// } |
140 | /// |
141 | /// let input = "ab" ; |
142 | /// let pair = pest::state(input, |state| { |
143 | /// // generating Token pair with Rule::ab ... |
144 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
145 | /// }).unwrap().next().unwrap(); |
146 | /// |
147 | /// assert_eq!(pair.into_span().as_str(), "ab" ); |
148 | /// ``` |
149 | #[inline ] |
150 | #[deprecated (since = "2.0.0" , note = "Please use `as_span` instead" )] |
151 | pub fn into_span(self) -> Span<'i> { |
152 | self.as_span() |
153 | } |
154 | |
155 | /// Returns the `Span` defined by the `Pair`, **without** consuming it. |
156 | /// |
157 | /// # Examples |
158 | /// |
159 | /// ``` |
160 | /// # use std::rc::Rc; |
161 | /// # use pest; |
162 | /// # #[allow (non_camel_case_types)] |
163 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
164 | /// enum Rule { |
165 | /// ab |
166 | /// } |
167 | /// |
168 | /// let input = "ab" ; |
169 | /// let pair = pest::state(input, |state| { |
170 | /// // generating Token pair with Rule::ab ... |
171 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
172 | /// }).unwrap().next().unwrap(); |
173 | /// |
174 | /// assert_eq!(pair.as_span().as_str(), "ab" ); |
175 | /// ``` |
176 | #[inline ] |
177 | pub fn as_span(&self) -> Span<'i> { |
178 | let start = self.pos(self.start); |
179 | let end = self.pos(self.pair()); |
180 | |
181 | // Generated positions always come from Positions and are UTF-8 borders. |
182 | unsafe { span::Span::new_unchecked(self.input, start, end) } |
183 | } |
184 | |
185 | /// Get current node tag |
186 | #[inline ] |
187 | pub fn as_node_tag(&self) -> Option<&str> { |
188 | match &self.queue[self.pair()] { |
189 | QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()), |
190 | _ => None, |
191 | } |
192 | } |
193 | |
194 | /// Returns the inner `Pairs` between the `Pair`, consuming it. |
195 | /// |
196 | /// # Examples |
197 | /// |
198 | /// ``` |
199 | /// # use std::rc::Rc; |
200 | /// # use pest; |
201 | /// # #[allow (non_camel_case_types)] |
202 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
203 | /// enum Rule { |
204 | /// a |
205 | /// } |
206 | /// |
207 | /// let input = "" ; |
208 | /// let pair = pest::state(input, |state| { |
209 | /// // generating Token pair with Rule::a ... |
210 | /// # state.rule(Rule::a, |s| Ok(s)) |
211 | /// }).unwrap().next().unwrap(); |
212 | /// |
213 | /// assert!(pair.into_inner().next().is_none()); |
214 | /// ``` |
215 | #[inline ] |
216 | pub fn into_inner(self) -> Pairs<'i, R> { |
217 | let pair = self.pair(); |
218 | |
219 | pairs::new( |
220 | self.queue, |
221 | self.input, |
222 | Some(self.line_index), |
223 | self.start + 1, |
224 | pair, |
225 | ) |
226 | } |
227 | |
228 | /// Returns the `Tokens` for the `Pair`. |
229 | /// |
230 | /// # Examples |
231 | /// |
232 | /// ``` |
233 | /// # use std::rc::Rc; |
234 | /// # use pest; |
235 | /// # #[allow (non_camel_case_types)] |
236 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
237 | /// enum Rule { |
238 | /// a |
239 | /// } |
240 | /// |
241 | /// let input = "" ; |
242 | /// let pair = pest::state(input, |state| { |
243 | /// // generating Token pair with Rule::a ... |
244 | /// # state.rule(Rule::a, |s| Ok(s)) |
245 | /// }).unwrap().next().unwrap(); |
246 | /// let tokens: Vec<_> = pair.tokens().collect(); |
247 | /// |
248 | /// assert_eq!(tokens.len(), 2); |
249 | /// ``` |
250 | #[inline ] |
251 | pub fn tokens(self) -> Tokens<'i, R> { |
252 | let end = self.pair(); |
253 | |
254 | tokens::new(self.queue, self.input, self.start, end + 1) |
255 | } |
256 | |
257 | /// Generates a string that stores the lexical information of `self` in |
258 | /// a pretty-printed JSON format. |
259 | #[cfg (feature = "pretty-print" )] |
260 | pub fn to_json(&self) -> String { |
261 | ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json." ) |
262 | } |
263 | |
264 | /// Returns the `line`, `col` of this pair start. |
265 | pub fn line_col(&self) -> (usize, usize) { |
266 | let pos = self.pos(self.start); |
267 | self.line_index.line_col(self.input, pos) |
268 | } |
269 | |
270 | fn pair(&self) -> usize { |
271 | match self.queue[self.start] { |
272 | QueueableToken::Start { |
273 | end_token_index, .. |
274 | } => end_token_index, |
275 | _ => unreachable!(), |
276 | } |
277 | } |
278 | |
279 | fn pos(&self, index: usize) -> usize { |
280 | match self.queue[index] { |
281 | QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => { |
282 | input_pos |
283 | } |
284 | } |
285 | } |
286 | } |
287 | |
288 | impl<'i, R: RuleType> Pairs<'i, R> { |
289 | /// Create a new `Pairs` iterator containing just the single `Pair`. |
290 | pub fn single(pair: Pair<'i, R>) -> Self { |
291 | let end: usize = pair.pair(); |
292 | pairs::new( |
293 | pair.queue, |
294 | pair.input, |
295 | line_index:Some(pair.line_index), |
296 | pair.start, |
297 | end, |
298 | ) |
299 | } |
300 | } |
301 | |
302 | impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { |
303 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
304 | let pair: &mut DebugStruct<'_, '_> = &mut f.debug_struct(name:"Pair" ); |
305 | pair.field(name:"rule" , &self.as_rule()); |
306 | // In order not to break compatibility |
307 | if let Some(s: &str) = self.as_node_tag() { |
308 | pair.field(name:"node_tag" , &s); |
309 | } |
310 | pair&mut DebugStruct<'_, '_>.field("span" , &self.as_span()) |
311 | .field(name:"inner" , &self.clone().into_inner().collect::<Vec<_>>()) |
312 | .finish() |
313 | } |
314 | } |
315 | |
316 | impl<'i, R: RuleType> fmt::Display for Pair<'i, R> { |
317 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
318 | let rule: R = self.as_rule(); |
319 | let start: usize = self.pos(self.start); |
320 | let end: usize = self.pos(self.pair()); |
321 | let mut pairs: impl Iterator- >
= self.clone().into_inner().peekable(); |
322 | |
323 | if pairs.peek().is_none() { |
324 | write!(f, " {:?}( {}, {})" , rule, start, end) |
325 | } else { |
326 | write!( |
327 | f, |
328 | " {:?}( {}, {}, [ {}])" , |
329 | rule, |
330 | start, |
331 | end, |
332 | pairs |
333 | .map(|pair| format!(" {}" , pair)) |
334 | .collect::<Vec<_>>() |
335 | .join(", " ) |
336 | ) |
337 | } |
338 | } |
339 | } |
340 | |
341 | impl<'i, R: PartialEq> PartialEq for Pair<'i, R> { |
342 | fn eq(&self, other: &Pair<'i, R>) -> bool { |
343 | Rc::ptr_eq(&self.queue, &other.queue) |
344 | && ptr::eq(self.input, b:other.input) |
345 | && self.start == other.start |
346 | } |
347 | } |
348 | |
349 | impl<'i, R: Eq> Eq for Pair<'i, R> {} |
350 | |
351 | impl<'i, R: Hash> Hash for Pair<'i, R> { |
352 | fn hash<H: Hasher>(&self, state: &mut H) { |
353 | (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); |
354 | (self.input as *const str).hash(state); |
355 | self.start.hash(state); |
356 | } |
357 | } |
358 | |
359 | #[cfg (feature = "pretty-print" )] |
360 | impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> { |
361 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
362 | where |
363 | S: ::serde::Serializer, |
364 | { |
365 | let start = self.pos(self.start); |
366 | let end = self.pos(self.pair()); |
367 | let rule = format!(" {:?}" , self.as_rule()); |
368 | let inner = self.clone().into_inner(); |
369 | |
370 | let mut ser = serializer.serialize_struct("Pairs" , 3)?; |
371 | ser.serialize_field("pos" , &(start, end))?; |
372 | ser.serialize_field("rule" , &rule)?; |
373 | |
374 | if inner.peek().is_none() { |
375 | ser.serialize_field("inner" , &self.as_str())?; |
376 | } else { |
377 | ser.serialize_field("inner" , &inner)?; |
378 | } |
379 | |
380 | ser.end() |
381 | } |
382 | } |
383 | |
384 | #[cfg (test)] |
385 | mod tests { |
386 | use crate::macros::tests::*; |
387 | use crate::parser::Parser; |
388 | |
389 | #[test ] |
390 | #[cfg (feature = "pretty-print" )] |
391 | fn test_pretty_print() { |
392 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); |
393 | |
394 | let expected = r#"{ |
395 | "pos": [ |
396 | 0, |
397 | 3 |
398 | ], |
399 | "rule": "a", |
400 | "inner": { |
401 | "pos": [ |
402 | 1, |
403 | 2 |
404 | ], |
405 | "pairs": [ |
406 | { |
407 | "pos": [ |
408 | 1, |
409 | 2 |
410 | ], |
411 | "rule": "b", |
412 | "inner": "b" |
413 | } |
414 | ] |
415 | } |
416 | }"# ; |
417 | |
418 | assert_eq!(expected, pair.to_json()); |
419 | } |
420 | |
421 | #[test ] |
422 | fn pair_into_inner() { |
423 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); // the tokens a(b()) |
424 | |
425 | let pairs = pair.into_inner(); // the tokens b() |
426 | |
427 | assert_eq!(2, pairs.tokens().count()); |
428 | } |
429 | } |
430 | |