1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use core::borrow::Borrow;
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::ptr;
19use core::str;
20
21#[cfg(feature = "pretty-print")]
22use serde::ser::SerializeStruct;
23
24use super::line_index::LineIndex;
25use super::pairs::{self, Pairs};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::span::{self, Span};
29use crate::RuleType;
30
31/// A matching pair of [`Token`]s and everything between them.
32///
33/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36/// editors.
37///
38/// [`Token`]: ../enum.Token.html
39#[derive(Clone)]
40pub struct Pair<'i, R> {
41 /// # Safety
42 ///
43 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
44 queue: Rc<Vec<QueueableToken<'i, R>>>,
45 input: &'i str,
46 /// Token index into `queue`.
47 start: usize,
48 line_index: Rc<LineIndex>,
49}
50
51/// # Safety
52///
53/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
54pub unsafe fn new<'i, R: RuleType>(
55 queue: Rc<Vec<QueueableToken<'i, R>>>,
56 input: &'i str,
57 line_index: Rc<LineIndex>,
58 start: usize,
59) -> Pair<'i, R> {
60 Pair {
61 queue,
62 input,
63 start,
64 line_index,
65 }
66}
67
68impl<'i, R: RuleType> Pair<'i, R> {
69 /// Returns the `Rule` of the `Pair`.
70 ///
71 /// # Examples
72 ///
73 /// ```
74 /// # use std::rc::Rc;
75 /// # use pest;
76 /// # #[allow(non_camel_case_types)]
77 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
78 /// enum Rule {
79 /// a
80 /// }
81 ///
82 /// let input = "";
83 /// let pair = pest::state(input, |state| {
84 /// // generating Token pair with Rule::a ...
85 /// # state.rule(Rule::a, |s| Ok(s))
86 /// }).unwrap().next().unwrap();
87 ///
88 /// assert_eq!(pair.as_rule(), Rule::a);
89 /// ```
90 #[inline]
91 pub fn as_rule(&self) -> R {
92 match self.queue[self.pair()] {
93 QueueableToken::End { rule, .. } => rule,
94 _ => unreachable!(),
95 }
96 }
97
98 /// Captures a slice from the `&str` defined by the token `Pair`.
99 ///
100 /// # Examples
101 ///
102 /// ```
103 /// # use std::rc::Rc;
104 /// # use pest;
105 /// # #[allow(non_camel_case_types)]
106 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
107 /// enum Rule {
108 /// ab
109 /// }
110 ///
111 /// let input = "ab";
112 /// let pair = pest::state(input, |state| {
113 /// // generating Token pair with Rule::ab ...
114 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
115 /// }).unwrap().next().unwrap();
116 ///
117 /// assert_eq!(pair.as_str(), "ab");
118 /// ```
119 #[inline]
120 pub fn as_str(&self) -> &'i str {
121 let start = self.pos(self.start);
122 let end = self.pos(self.pair());
123
124 // Generated positions always come from Positions and are UTF-8 borders.
125 &self.input[start..end]
126 }
127
128 /// Returns the `Span` defined by the `Pair`, consuming it.
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// # use std::rc::Rc;
134 /// # use pest;
135 /// # #[allow(non_camel_case_types)]
136 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
137 /// enum Rule {
138 /// ab
139 /// }
140 ///
141 /// let input = "ab";
142 /// let pair = pest::state(input, |state| {
143 /// // generating Token pair with Rule::ab ...
144 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
145 /// }).unwrap().next().unwrap();
146 ///
147 /// assert_eq!(pair.into_span().as_str(), "ab");
148 /// ```
149 #[inline]
150 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
151 pub fn into_span(self) -> Span<'i> {
152 self.as_span()
153 }
154
155 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
156 ///
157 /// # Examples
158 ///
159 /// ```
160 /// # use std::rc::Rc;
161 /// # use pest;
162 /// # #[allow(non_camel_case_types)]
163 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
164 /// enum Rule {
165 /// ab
166 /// }
167 ///
168 /// let input = "ab";
169 /// let pair = pest::state(input, |state| {
170 /// // generating Token pair with Rule::ab ...
171 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
172 /// }).unwrap().next().unwrap();
173 ///
174 /// assert_eq!(pair.as_span().as_str(), "ab");
175 /// ```
176 #[inline]
177 pub fn as_span(&self) -> Span<'i> {
178 let start = self.pos(self.start);
179 let end = self.pos(self.pair());
180
181 // Generated positions always come from Positions and are UTF-8 borders.
182 unsafe { span::Span::new_unchecked(self.input, start, end) }
183 }
184
185 /// Get current node tag
186 #[inline]
187 pub fn as_node_tag(&self) -> Option<&str> {
188 match &self.queue[self.pair()] {
189 QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
190 _ => None,
191 }
192 }
193
194 /// Returns the inner `Pairs` between the `Pair`, consuming it.
195 ///
196 /// # Examples
197 ///
198 /// ```
199 /// # use std::rc::Rc;
200 /// # use pest;
201 /// # #[allow(non_camel_case_types)]
202 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
203 /// enum Rule {
204 /// a
205 /// }
206 ///
207 /// let input = "";
208 /// let pair = pest::state(input, |state| {
209 /// // generating Token pair with Rule::a ...
210 /// # state.rule(Rule::a, |s| Ok(s))
211 /// }).unwrap().next().unwrap();
212 ///
213 /// assert!(pair.into_inner().next().is_none());
214 /// ```
215 #[inline]
216 pub fn into_inner(self) -> Pairs<'i, R> {
217 let pair = self.pair();
218
219 pairs::new(
220 self.queue,
221 self.input,
222 Some(self.line_index),
223 self.start + 1,
224 pair,
225 )
226 }
227
228 /// Returns the `Tokens` for the `Pair`.
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// # use std::rc::Rc;
234 /// # use pest;
235 /// # #[allow(non_camel_case_types)]
236 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
237 /// enum Rule {
238 /// a
239 /// }
240 ///
241 /// let input = "";
242 /// let pair = pest::state(input, |state| {
243 /// // generating Token pair with Rule::a ...
244 /// # state.rule(Rule::a, |s| Ok(s))
245 /// }).unwrap().next().unwrap();
246 /// let tokens: Vec<_> = pair.tokens().collect();
247 ///
248 /// assert_eq!(tokens.len(), 2);
249 /// ```
250 #[inline]
251 pub fn tokens(self) -> Tokens<'i, R> {
252 let end = self.pair();
253
254 tokens::new(self.queue, self.input, self.start, end + 1)
255 }
256
257 /// Generates a string that stores the lexical information of `self` in
258 /// a pretty-printed JSON format.
259 #[cfg(feature = "pretty-print")]
260 pub fn to_json(&self) -> String {
261 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
262 }
263
264 /// Returns the `line`, `col` of this pair start.
265 pub fn line_col(&self) -> (usize, usize) {
266 let pos = self.pos(self.start);
267 self.line_index.line_col(self.input, pos)
268 }
269
270 fn pair(&self) -> usize {
271 match self.queue[self.start] {
272 QueueableToken::Start {
273 end_token_index, ..
274 } => end_token_index,
275 _ => unreachable!(),
276 }
277 }
278
279 fn pos(&self, index: usize) -> usize {
280 match self.queue[index] {
281 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
282 input_pos
283 }
284 }
285 }
286}
287
288impl<'i, R: RuleType> Pairs<'i, R> {
289 /// Create a new `Pairs` iterator containing just the single `Pair`.
290 pub fn single(pair: Pair<'i, R>) -> Self {
291 let end: usize = pair.pair();
292 pairs::new(
293 pair.queue,
294 pair.input,
295 line_index:Some(pair.line_index),
296 pair.start,
297 end,
298 )
299 }
300}
301
302impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
303 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
304 let pair: &mut DebugStruct<'_, '_> = &mut f.debug_struct(name:"Pair");
305 pair.field(name:"rule", &self.as_rule());
306 // In order not to break compatibility
307 if let Some(s: &str) = self.as_node_tag() {
308 pair.field(name:"node_tag", &s);
309 }
310 pair&mut DebugStruct<'_, '_>.field("span", &self.as_span())
311 .field(name:"inner", &self.clone().into_inner().collect::<Vec<_>>())
312 .finish()
313 }
314}
315
316impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
317 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
318 let rule: R = self.as_rule();
319 let start: usize = self.pos(self.start);
320 let end: usize = self.pos(self.pair());
321 let mut pairs: impl Iterator> = self.clone().into_inner().peekable();
322
323 if pairs.peek().is_none() {
324 write!(f, "{:?}({}, {})", rule, start, end)
325 } else {
326 write!(
327 f,
328 "{:?}({}, {}, [{}])",
329 rule,
330 start,
331 end,
332 pairs
333 .map(|pair| format!("{}", pair))
334 .collect::<Vec<_>>()
335 .join(", ")
336 )
337 }
338 }
339}
340
341impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
342 fn eq(&self, other: &Pair<'i, R>) -> bool {
343 Rc::ptr_eq(&self.queue, &other.queue)
344 && ptr::eq(self.input, b:other.input)
345 && self.start == other.start
346 }
347}
348
349impl<'i, R: Eq> Eq for Pair<'i, R> {}
350
351impl<'i, R: Hash> Hash for Pair<'i, R> {
352 fn hash<H: Hasher>(&self, state: &mut H) {
353 (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
354 (self.input as *const str).hash(state);
355 self.start.hash(state);
356 }
357}
358
359#[cfg(feature = "pretty-print")]
360impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
361 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
362 where
363 S: ::serde::Serializer,
364 {
365 let start = self.pos(self.start);
366 let end = self.pos(self.pair());
367 let rule = format!("{:?}", self.as_rule());
368 let inner = self.clone().into_inner();
369
370 let mut ser = serializer.serialize_struct("Pairs", 3)?;
371 ser.serialize_field("pos", &(start, end))?;
372 ser.serialize_field("rule", &rule)?;
373
374 if inner.peek().is_none() {
375 ser.serialize_field("inner", &self.as_str())?;
376 } else {
377 ser.serialize_field("inner", &inner)?;
378 }
379
380 ser.end()
381 }
382}
383
384#[cfg(test)]
385mod tests {
386 use crate::macros::tests::*;
387 use crate::parser::Parser;
388
389 #[test]
390 #[cfg(feature = "pretty-print")]
391 fn test_pretty_print() {
392 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
393
394 let expected = r#"{
395 "pos": [
396 0,
397 3
398 ],
399 "rule": "a",
400 "inner": {
401 "pos": [
402 1,
403 2
404 ],
405 "pairs": [
406 {
407 "pos": [
408 1,
409 2
410 ],
411 "rule": "b",
412 "inner": "b"
413 }
414 ]
415 }
416}"#;
417
418 assert_eq!(expected, pair.to_json());
419 }
420
421 #[test]
422 fn pair_into_inner() {
423 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
424
425 let pairs = pair.into_inner(); // the tokens b()
426
427 assert_eq!(2, pairs.tokens().count());
428 }
429}
430