pair.rs source code [crates/pest/src/iterators/pair.rs]

1	// pest. The Elegant Parser
2	// Copyright (c) 2018 Dragoș Tiselice
3	//
4	// Licensed under the Apache License, Version 2.0
5	// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6	// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7	// option. All files in the project carrying such notice may not be copied,
8	// modified, or distributed except according to those terms.
9
10	use alloc::format;
11	use alloc::rc::Rc;
12	#[cfg(feature = "pretty-print")]
13	use alloc::string::String;
14	use alloc::vec::Vec;
15	use core::borrow::Borrow;
16	use core::fmt;
17	use core::hash::{Hash, Hasher};
18	use core::ptr;
19	use core::str;
20
21	#[cfg(feature = "pretty-print")]
22	use serde::ser::SerializeStruct;
23
24	use super::line_index::LineIndex;
25	use super::pairs::{self, Pairs};
26	use super::queueable_token::QueueableToken;
27	use super::tokens::{self, Tokens};
28	use crate::span::Span;
29	use crate::RuleType;
30
31	/// A matching pair of [`Token`]s and everything between them.
32	///
33	/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34	/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35	/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36	/// editors.
37	///
38	/// [`Token`]: ../enum.Token.html
39	#[derive(Clone)]
40	pub struct Pair<'i, R> {
41	queue: Rc<Vec<QueueableToken<'i, R>>>,
42	input: &'i str,
43	/// Token index into `queue`.
44	start: usize,
45	line_index: Rc<LineIndex>,
46	}
47
48	pub fn new<'i, R: RuleType>(
49	queue: Rc<Vec<QueueableToken<'i, R>>>,
50	input: &'i str,
51	line_index: Rc<LineIndex>,
52	start: usize,
53	) -> Pair<'i, R> {
54	Pair {
55	queue,
56	input,
57	start,
58	line_index,
59	}
60	}
61
62	impl<'i, R: RuleType> Pair<'i, R> {
63	/// Returns the `Rule` of the `Pair`.
64	///
65	/// # Examples
66	///
67	/// ```
68	/// # use std::rc::Rc;
69	/// # use pest;
70	/// # #[allow(non_camel_case_types)]
71	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
72	/// enum Rule {
73	/// a
74	/// }
75	///
76	/// let input = "";
77	/// let pair = pest::state(input, \|state\| {
78	/// // generating Token pair with Rule::a ...
79	/// # state.rule(Rule::a, \|s\| Ok(s))
80	/// }).unwrap().next().unwrap();
81	///
82	/// assert_eq!(pair.as_rule(), Rule::a);
83	/// ```
84	#[inline]
85	pub fn as_rule(&self) -> R {
86	match self.queue[self.pair()] {
87	QueueableToken::End { rule, .. } => rule,
88	_ => unreachable!(),
89	}
90	}
91
92	/// Captures a slice from the `&str` defined by the token `Pair`.
93	///
94	/// # Examples
95	///
96	/// ```
97	/// # use std::rc::Rc;
98	/// # use pest;
99	/// # #[allow(non_camel_case_types)]
100	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
101	/// enum Rule {
102	/// ab
103	/// }
104	///
105	/// let input = "ab";
106	/// let pair = pest::state(input, \|state\| {
107	/// // generating Token pair with Rule::ab ...
108	/// # state.rule(Rule::ab, \|s\| s.match_string("ab"))
109	/// }).unwrap().next().unwrap();
110	///
111	/// assert_eq!(pair.as_str(), "ab");
112	/// ```
113	#[inline]
114	pub fn as_str(&self) -> &'i str {
115	let start = self.pos(self.start);
116	let end = self.pos(self.pair());
117
118	// Generated positions always come from Positions and are UTF-8 borders.
119	&self.input[start..end]
120	}
121
122	/// Returns the input string of the `Pair`.
123	///
124	/// This function returns the input string of the `Pair` as a `&str`. This is the source string
125	/// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
126	/// the `Pair` or to perform further processing on the string.
127	///
128	/// # Examples
129	///
130	/// ```
131	/// # use std::rc::Rc;
132	/// # use pest;
133	/// # #[allow(non_camel_case_types)]
134	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135	/// enum Rule {
136	/// ab
137	/// }
138	///
139	/// // Example: Get input string from a Pair
140	///
141	/// let input = "ab";
142	/// let pair = pest::state(input, \|state\| {
143	/// // generating Token pair with Rule::ab ...
144	/// # state.rule(Rule::ab, \|s\| s.match_string("ab"))
145	/// }).unwrap().next().unwrap();
146	///
147	/// assert_eq!(pair.as_str(), "ab");
148	/// assert_eq!(input, pair.get_input());
149	/// ```
150	pub fn get_input(&self) -> &'i str {
151	self.input
152	}
153
154	/// Returns the `Span` defined by the `Pair`, consuming it.
155	///
156	/// # Examples
157	///
158	/// ```
159	/// # use std::rc::Rc;
160	/// # use pest;
161	/// # #[allow(non_camel_case_types)]
162	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163	/// enum Rule {
164	/// ab
165	/// }
166	///
167	/// let input = "ab";
168	/// let pair = pest::state(input, \|state\| {
169	/// // generating Token pair with Rule::ab ...
170	/// # state.rule(Rule::ab, \|s\| s.match_string("ab"))
171	/// }).unwrap().next().unwrap();
172	///
173	/// assert_eq!(pair.into_span().as_str(), "ab");
174	/// ```
175	#[inline]
176	#[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
177	pub fn into_span(self) -> Span<'i> {
178	self.as_span()
179	}
180
181	/// Returns the `Span` defined by the `Pair`, without* consuming it.*
182	///
183	/// # Examples
184	///
185	/// ```
186	/// # use std::rc::Rc;
187	/// # use pest;
188	/// # #[allow(non_camel_case_types)]
189	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
190	/// enum Rule {
191	/// ab
192	/// }
193	///
194	/// let input = "ab";
195	/// let pair = pest::state(input, \|state\| {
196	/// // generating Token pair with Rule::ab ...
197	/// # state.rule(Rule::ab, \|s\| s.match_string("ab"))
198	/// }).unwrap().next().unwrap();
199	///
200	/// assert_eq!(pair.as_span().as_str(), "ab");
201	/// ```
202	#[inline]
203	pub fn as_span(&self) -> Span<'i> {
204	let start = self.pos(self.start);
205	let end = self.pos(self.pair());
206
207	Span::new_internal(self.input, start, end)
208	}
209
210	/// Get current node tag
211	#[inline]
212	pub fn as_node_tag(&self) -> Option<&str> {
213	match &self.queue[self.pair()] {
214	QueueableToken::End { tag, .. } => tag.as_ref().map(\|x\| x.borrow()),
215	_ => None,
216	}
217	}
218
219	/// Returns the inner `Pairs` between the `Pair`, consuming it.
220	///
221	/// # Examples
222	///
223	/// ```
224	/// # use std::rc::Rc;
225	/// # use pest;
226	/// # #[allow(non_camel_case_types)]
227	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
228	/// enum Rule {
229	/// a
230	/// }
231	///
232	/// let input = "";
233	/// let pair = pest::state(input, \|state\| {
234	/// // generating Token pair with Rule::a ...
235	/// # state.rule(Rule::a, \|s\| Ok(s))
236	/// }).unwrap().next().unwrap();
237	///
238	/// assert!(pair.into_inner().next().is_none());
239	/// ```
240	#[inline]
241	pub fn into_inner(self) -> Pairs<'i, R> {
242	let pair = self.pair();
243
244	pairs::new(
245	self.queue,
246	self.input,
247	Some(self.line_index),
248	self.start + `1`,
249	pair,
250	)
251	}
252
253	/// Returns the `Tokens` for the `Pair`.
254	///
255	/// # Examples
256	///
257	/// ```
258	/// # use std::rc::Rc;
259	/// # use pest;
260	/// # #[allow(non_camel_case_types)]
261	/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
262	/// enum Rule {
263	/// a
264	/// }
265	///
266	/// let input = "";
267	/// let pair = pest::state(input, \|state\| {
268	/// // generating Token pair with Rule::a ...
269	/// # state.rule(Rule::a, \|s\| Ok(s))
270	/// }).unwrap().next().unwrap();
271	/// let tokens: Vec<_> = pair.tokens().collect();
272	///
273	/// assert_eq!(tokens.len(), `2`);
274	/// ```
275	#[inline]
276	pub fn tokens(self) -> Tokens<'i, R> {
277	let end = self.pair();
278
279	tokens::new(self.queue, self.input, self.start, end + `1`)
280	}
281
282	/// Generates a string that stores the lexical information of `self` in
283	/// a pretty-printed JSON format.
284	#[cfg(feature = "pretty-print")]
285	pub fn to_json(&self) -> String {
286	::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
287	}
288
289	/// Returns the `line`, `col` of this pair start.
290	pub fn line_col(&self) -> (usize, usize) {
291	let pos = self.pos(self.start);
292	self.line_index.line_col(self.input, pos)
293	}
294
295	fn pair(&self) -> usize {
296	match self.queue[self.start] {
297	QueueableToken::Start {
298	end_token_index, ..
299	} => end_token_index,
300	_ => unreachable!(),
301	}
302	}
303
304	fn pos(&self, index: usize) -> usize {
305	match self.queue[index] {
306	QueueableToken::Start { input_pos, .. } \| QueueableToken::End { input_pos, .. } => {
307	input_pos
308	}
309	}
310	}
311	}
312
313	impl<'i, R: RuleType> Pairs<'i, R> {
314	/// Create a new `Pairs` iterator containing just the single `Pair`.
315	pub fn single(pair: Pair<'i, R>) -> Self {
316	let end: usize = pair.pair();
317	pairs::new(
318	pair.queue,
319	pair.input,
320	line_index:Some(pair.line_index),
321	pair.start,
322	end,
323	)
324	}
325	}
326
327	impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
328	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329	let pair: &mut DebugStruct<'_, '_> = &mut f.debug_struct(name:"Pair");
330	pair.field(name:"rule", &self.as_rule());
331	// In order not to break compatibility
332	if let Some(s: &str) = self.as_node_tag() {
333	pair.field(name:"node_tag", &s);
334	}
335	pair&mut DebugStruct<'_, '_>.field("span", &self.as_span())
336	.field(name:"inner", &self.clone().into_inner().collect::<Vec<_>>())
337	.finish()
338	}
339	}
340
341	impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
342	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
343	let rule: R = self.as_rule();
344	let start: usize = self.pos(self.start);
345	let end: usize = self.pos(self.pair());
346	let mut pairs: impl Iterator> = self.clone().into_inner().peekable();
347
348	if pairs.peek().is_none() {
349	write!(f, "{:?}({}, {})", rule, start, end)
350	} else {
351	write!(
352	f,
353	"{:?}({}, {}, [{}])",
354	rule,
355	start,
356	end,
357	pairs
358	.map(\|pair\| format!("{}", pair))
359	.collect::<Vec<_>>()
360	.join(", ")
361	)
362	}
363	}
364	}
365
366	impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
367	fn eq(&self, other: &Pair<'i, R>) -> bool {
368	Rc::ptr_eq(&self.queue, &other.queue)
369	&& ptr::eq(self.input, b:other.input)
370	&& self.start == other.start
371	}
372	}
373
374	impl<'i, R: Eq> Eq for Pair<'i, R> {}
375
376	impl<'i, R: Hash> Hash for Pair<'i, R> {
377	fn hash<H: Hasher>(&self, state: &mut H) {
378	(&self.queue as const Vec<QueueableToken<'i, R>>).hash(state);
379	(self.input as *const str).hash(state);
380	self.start.hash(state);
381	}
382	}
383
384	#[cfg(feature = "pretty-print")]
385	impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
386	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
387	where
388	S: ::serde::Serializer,
389	{
390	let start = self.pos(self.start);
391	let end = self.pos(self.pair());
392	let rule = format!("{:?}", self.as_rule());
393	let inner = self.clone().into_inner();
394
395	let mut ser = serializer.serialize_struct("Pairs", `3`)?;
396	ser.serialize_field("pos", &(start, end))?;
397	ser.serialize_field("rule", &rule)?;
398
399	if inner.peek().is_none() {
400	ser.serialize_field("inner", &self.as_str())?;
401	} else {
402	ser.serialize_field("inner", &inner)?;
403	}
404
405	ser.end()
406	}
407	}
408
409	#[cfg(test)]
410	mod tests {
411	use crate::macros::tests::*;
412	use crate::parser::Parser;
413
414	#[test]
415	#[cfg(feature = "pretty-print")]
416	fn test_pretty_print() {
417	let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
418
419	let expected = r#"{
420	"pos": [
421	0,
422	3
423	],
424	"rule": "a",
425	"inner": {
426	"pos": [
427	1,
428	2
429	],
430	"pairs": [
431	{
432	"pos": [
433	1,
434	2
435	],
436	"rule": "b",
437	"inner": "b"
438	}
439	]
440	}
441	}"#;
442
443	assert_eq!(expected, pair.to_json());
444	}
445
446	#[test]
447	fn pair_into_inner() {
448	let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
449
450	let pairs = pair.into_inner(); // the tokens b()
451
452	assert_eq!(`2`, pairs.tokens().count());
453	}
454
455	#[test]
456	fn get_input_of_pair() {
457	let input = "abcde";
458	let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
459
460	assert_eq!(input, pair.get_input());
461	}
462	}
463