| 1 | // pest. The Elegant Parser |
| 2 | // Copyright (c) 2018 Dragoș Tiselice |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 |
| 5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| 6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. All files in the project carrying such notice may not be copied, |
| 8 | // modified, or distributed except according to those terms. |
| 9 | |
| 10 | use alloc::format; |
| 11 | use alloc::rc::Rc; |
| 12 | #[cfg (feature = "pretty-print" )] |
| 13 | use alloc::string::String; |
| 14 | use alloc::vec::Vec; |
| 15 | use core::borrow::Borrow; |
| 16 | use core::fmt; |
| 17 | use core::hash::{Hash, Hasher}; |
| 18 | use core::ptr; |
| 19 | use core::str; |
| 20 | |
| 21 | #[cfg (feature = "pretty-print" )] |
| 22 | use serde::ser::SerializeStruct; |
| 23 | |
| 24 | use super::line_index::LineIndex; |
| 25 | use super::pairs::{self, Pairs}; |
| 26 | use super::queueable_token::QueueableToken; |
| 27 | use super::tokens::{self, Tokens}; |
| 28 | use crate::span::Span; |
| 29 | use crate::RuleType; |
| 30 | |
| 31 | /// A matching pair of [`Token`]s and everything between them. |
| 32 | /// |
| 33 | /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the |
| 34 | /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well. |
| 35 | /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in |
| 36 | /// editors. |
| 37 | /// |
| 38 | /// [`Token`]: ../enum.Token.html |
| 39 | #[derive (Clone)] |
| 40 | pub struct Pair<'i, R> { |
| 41 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
| 42 | input: &'i str, |
| 43 | /// Token index into `queue`. |
| 44 | start: usize, |
| 45 | line_index: Rc<LineIndex>, |
| 46 | } |
| 47 | |
| 48 | pub fn new<'i, R: RuleType>( |
| 49 | queue: Rc<Vec<QueueableToken<'i, R>>>, |
| 50 | input: &'i str, |
| 51 | line_index: Rc<LineIndex>, |
| 52 | start: usize, |
| 53 | ) -> Pair<'i, R> { |
| 54 | Pair { |
| 55 | queue, |
| 56 | input, |
| 57 | start, |
| 58 | line_index, |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | impl<'i, R: RuleType> Pair<'i, R> { |
| 63 | /// Returns the `Rule` of the `Pair`. |
| 64 | /// |
| 65 | /// # Examples |
| 66 | /// |
| 67 | /// ``` |
| 68 | /// # use std::rc::Rc; |
| 69 | /// # use pest; |
| 70 | /// # #[allow (non_camel_case_types)] |
| 71 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 72 | /// enum Rule { |
| 73 | /// a |
| 74 | /// } |
| 75 | /// |
| 76 | /// let input = "" ; |
| 77 | /// let pair = pest::state(input, |state| { |
| 78 | /// // generating Token pair with Rule::a ... |
| 79 | /// # state.rule(Rule::a, |s| Ok(s)) |
| 80 | /// }).unwrap().next().unwrap(); |
| 81 | /// |
| 82 | /// assert_eq!(pair.as_rule(), Rule::a); |
| 83 | /// ``` |
| 84 | #[inline ] |
| 85 | pub fn as_rule(&self) -> R { |
| 86 | match self.queue[self.pair()] { |
| 87 | QueueableToken::End { rule, .. } => rule, |
| 88 | _ => unreachable!(), |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | /// Captures a slice from the `&str` defined by the token `Pair`. |
| 93 | /// |
| 94 | /// # Examples |
| 95 | /// |
| 96 | /// ``` |
| 97 | /// # use std::rc::Rc; |
| 98 | /// # use pest; |
| 99 | /// # #[allow (non_camel_case_types)] |
| 100 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 101 | /// enum Rule { |
| 102 | /// ab |
| 103 | /// } |
| 104 | /// |
| 105 | /// let input = "ab" ; |
| 106 | /// let pair = pest::state(input, |state| { |
| 107 | /// // generating Token pair with Rule::ab ... |
| 108 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
| 109 | /// }).unwrap().next().unwrap(); |
| 110 | /// |
| 111 | /// assert_eq!(pair.as_str(), "ab" ); |
| 112 | /// ``` |
| 113 | #[inline ] |
| 114 | pub fn as_str(&self) -> &'i str { |
| 115 | let start = self.pos(self.start); |
| 116 | let end = self.pos(self.pair()); |
| 117 | |
| 118 | // Generated positions always come from Positions and are UTF-8 borders. |
| 119 | &self.input[start..end] |
| 120 | } |
| 121 | |
| 122 | /// Returns the input string of the `Pair`. |
| 123 | /// |
| 124 | /// This function returns the input string of the `Pair` as a `&str`. This is the source string |
| 125 | /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of |
| 126 | /// the `Pair` or to perform further processing on the string. |
| 127 | /// |
| 128 | /// # Examples |
| 129 | /// |
| 130 | /// ``` |
| 131 | /// # use std::rc::Rc; |
| 132 | /// # use pest; |
| 133 | /// # #[allow (non_camel_case_types)] |
| 134 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 135 | /// enum Rule { |
| 136 | /// ab |
| 137 | /// } |
| 138 | /// |
| 139 | /// // Example: Get input string from a Pair |
| 140 | /// |
| 141 | /// let input = "ab" ; |
| 142 | /// let pair = pest::state(input, |state| { |
| 143 | /// // generating Token pair with Rule::ab ... |
| 144 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
| 145 | /// }).unwrap().next().unwrap(); |
| 146 | /// |
| 147 | /// assert_eq!(pair.as_str(), "ab" ); |
| 148 | /// assert_eq!(input, pair.get_input()); |
| 149 | /// ``` |
| 150 | pub fn get_input(&self) -> &'i str { |
| 151 | self.input |
| 152 | } |
| 153 | |
| 154 | /// Returns the `Span` defined by the `Pair`, consuming it. |
| 155 | /// |
| 156 | /// # Examples |
| 157 | /// |
| 158 | /// ``` |
| 159 | /// # use std::rc::Rc; |
| 160 | /// # use pest; |
| 161 | /// # #[allow (non_camel_case_types)] |
| 162 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 163 | /// enum Rule { |
| 164 | /// ab |
| 165 | /// } |
| 166 | /// |
| 167 | /// let input = "ab" ; |
| 168 | /// let pair = pest::state(input, |state| { |
| 169 | /// // generating Token pair with Rule::ab ... |
| 170 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
| 171 | /// }).unwrap().next().unwrap(); |
| 172 | /// |
| 173 | /// assert_eq!(pair.into_span().as_str(), "ab" ); |
| 174 | /// ``` |
| 175 | #[inline ] |
| 176 | #[deprecated (since = "2.0.0" , note = "Please use `as_span` instead" )] |
| 177 | pub fn into_span(self) -> Span<'i> { |
| 178 | self.as_span() |
| 179 | } |
| 180 | |
| 181 | /// Returns the `Span` defined by the `Pair`, **without** consuming it. |
| 182 | /// |
| 183 | /// # Examples |
| 184 | /// |
| 185 | /// ``` |
| 186 | /// # use std::rc::Rc; |
| 187 | /// # use pest; |
| 188 | /// # #[allow (non_camel_case_types)] |
| 189 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 190 | /// enum Rule { |
| 191 | /// ab |
| 192 | /// } |
| 193 | /// |
| 194 | /// let input = "ab" ; |
| 195 | /// let pair = pest::state(input, |state| { |
| 196 | /// // generating Token pair with Rule::ab ... |
| 197 | /// # state.rule(Rule::ab, |s| s.match_string("ab" )) |
| 198 | /// }).unwrap().next().unwrap(); |
| 199 | /// |
| 200 | /// assert_eq!(pair.as_span().as_str(), "ab" ); |
| 201 | /// ``` |
| 202 | #[inline ] |
| 203 | pub fn as_span(&self) -> Span<'i> { |
| 204 | let start = self.pos(self.start); |
| 205 | let end = self.pos(self.pair()); |
| 206 | |
| 207 | Span::new_internal(self.input, start, end) |
| 208 | } |
| 209 | |
| 210 | /// Get current node tag |
| 211 | #[inline ] |
| 212 | pub fn as_node_tag(&self) -> Option<&str> { |
| 213 | match &self.queue[self.pair()] { |
| 214 | QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()), |
| 215 | _ => None, |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | /// Returns the inner `Pairs` between the `Pair`, consuming it. |
| 220 | /// |
| 221 | /// # Examples |
| 222 | /// |
| 223 | /// ``` |
| 224 | /// # use std::rc::Rc; |
| 225 | /// # use pest; |
| 226 | /// # #[allow (non_camel_case_types)] |
| 227 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 228 | /// enum Rule { |
| 229 | /// a |
| 230 | /// } |
| 231 | /// |
| 232 | /// let input = "" ; |
| 233 | /// let pair = pest::state(input, |state| { |
| 234 | /// // generating Token pair with Rule::a ... |
| 235 | /// # state.rule(Rule::a, |s| Ok(s)) |
| 236 | /// }).unwrap().next().unwrap(); |
| 237 | /// |
| 238 | /// assert!(pair.into_inner().next().is_none()); |
| 239 | /// ``` |
| 240 | #[inline ] |
| 241 | pub fn into_inner(self) -> Pairs<'i, R> { |
| 242 | let pair = self.pair(); |
| 243 | |
| 244 | pairs::new( |
| 245 | self.queue, |
| 246 | self.input, |
| 247 | Some(self.line_index), |
| 248 | self.start + 1, |
| 249 | pair, |
| 250 | ) |
| 251 | } |
| 252 | |
| 253 | /// Returns the `Tokens` for the `Pair`. |
| 254 | /// |
| 255 | /// # Examples |
| 256 | /// |
| 257 | /// ``` |
| 258 | /// # use std::rc::Rc; |
| 259 | /// # use pest; |
| 260 | /// # #[allow (non_camel_case_types)] |
| 261 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 262 | /// enum Rule { |
| 263 | /// a |
| 264 | /// } |
| 265 | /// |
| 266 | /// let input = "" ; |
| 267 | /// let pair = pest::state(input, |state| { |
| 268 | /// // generating Token pair with Rule::a ... |
| 269 | /// # state.rule(Rule::a, |s| Ok(s)) |
| 270 | /// }).unwrap().next().unwrap(); |
| 271 | /// let tokens: Vec<_> = pair.tokens().collect(); |
| 272 | /// |
| 273 | /// assert_eq!(tokens.len(), 2); |
| 274 | /// ``` |
| 275 | #[inline ] |
| 276 | pub fn tokens(self) -> Tokens<'i, R> { |
| 277 | let end = self.pair(); |
| 278 | |
| 279 | tokens::new(self.queue, self.input, self.start, end + 1) |
| 280 | } |
| 281 | |
| 282 | /// Generates a string that stores the lexical information of `self` in |
| 283 | /// a pretty-printed JSON format. |
| 284 | #[cfg (feature = "pretty-print" )] |
| 285 | pub fn to_json(&self) -> String { |
| 286 | ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json." ) |
| 287 | } |
| 288 | |
| 289 | /// Returns the `line`, `col` of this pair start. |
| 290 | pub fn line_col(&self) -> (usize, usize) { |
| 291 | let pos = self.pos(self.start); |
| 292 | self.line_index.line_col(self.input, pos) |
| 293 | } |
| 294 | |
| 295 | fn pair(&self) -> usize { |
| 296 | match self.queue[self.start] { |
| 297 | QueueableToken::Start { |
| 298 | end_token_index, .. |
| 299 | } => end_token_index, |
| 300 | _ => unreachable!(), |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | fn pos(&self, index: usize) -> usize { |
| 305 | match self.queue[index] { |
| 306 | QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => { |
| 307 | input_pos |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | impl<'i, R: RuleType> Pairs<'i, R> { |
| 314 | /// Create a new `Pairs` iterator containing just the single `Pair`. |
| 315 | pub fn single(pair: Pair<'i, R>) -> Self { |
| 316 | let end: usize = pair.pair(); |
| 317 | pairs::new( |
| 318 | pair.queue, |
| 319 | pair.input, |
| 320 | line_index:Some(pair.line_index), |
| 321 | pair.start, |
| 322 | end, |
| 323 | ) |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> { |
| 328 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 329 | let pair: &mut DebugStruct<'_, '_> = &mut f.debug_struct(name:"Pair" ); |
| 330 | pair.field(name:"rule" , &self.as_rule()); |
| 331 | // In order not to break compatibility |
| 332 | if let Some(s: &str) = self.as_node_tag() { |
| 333 | pair.field(name:"node_tag" , &s); |
| 334 | } |
| 335 | pair&mut DebugStruct<'_, '_>.field("span" , &self.as_span()) |
| 336 | .field(name:"inner" , &self.clone().into_inner().collect::<Vec<_>>()) |
| 337 | .finish() |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | impl<'i, R: RuleType> fmt::Display for Pair<'i, R> { |
| 342 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 343 | let rule: R = self.as_rule(); |
| 344 | let start: usize = self.pos(self.start); |
| 345 | let end: usize = self.pos(self.pair()); |
| 346 | let mut pairs: impl Iterator- >
= self.clone().into_inner().peekable(); |
| 347 | |
| 348 | if pairs.peek().is_none() { |
| 349 | write!(f, " {:?}( {}, {})" , rule, start, end) |
| 350 | } else { |
| 351 | write!( |
| 352 | f, |
| 353 | " {:?}( {}, {}, [ {}])" , |
| 354 | rule, |
| 355 | start, |
| 356 | end, |
| 357 | pairs |
| 358 | .map(|pair| format!(" {}" , pair)) |
| 359 | .collect::<Vec<_>>() |
| 360 | .join(", " ) |
| 361 | ) |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | |
| 366 | impl<'i, R: PartialEq> PartialEq for Pair<'i, R> { |
| 367 | fn eq(&self, other: &Pair<'i, R>) -> bool { |
| 368 | Rc::ptr_eq(&self.queue, &other.queue) |
| 369 | && ptr::eq(self.input, b:other.input) |
| 370 | && self.start == other.start |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | impl<'i, R: Eq> Eq for Pair<'i, R> {} |
| 375 | |
| 376 | impl<'i, R: Hash> Hash for Pair<'i, R> { |
| 377 | fn hash<H: Hasher>(&self, state: &mut H) { |
| 378 | (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); |
| 379 | (self.input as *const str).hash(state); |
| 380 | self.start.hash(state); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | #[cfg (feature = "pretty-print" )] |
| 385 | impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> { |
| 386 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
| 387 | where |
| 388 | S: ::serde::Serializer, |
| 389 | { |
| 390 | let start = self.pos(self.start); |
| 391 | let end = self.pos(self.pair()); |
| 392 | let rule = format!("{:?}" , self.as_rule()); |
| 393 | let inner = self.clone().into_inner(); |
| 394 | |
| 395 | let mut ser = serializer.serialize_struct("Pairs" , 3)?; |
| 396 | ser.serialize_field("pos" , &(start, end))?; |
| 397 | ser.serialize_field("rule" , &rule)?; |
| 398 | |
| 399 | if inner.peek().is_none() { |
| 400 | ser.serialize_field("inner" , &self.as_str())?; |
| 401 | } else { |
| 402 | ser.serialize_field("inner" , &inner)?; |
| 403 | } |
| 404 | |
| 405 | ser.end() |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | #[cfg (test)] |
| 410 | mod tests { |
| 411 | use crate::macros::tests::*; |
| 412 | use crate::parser::Parser; |
| 413 | |
| 414 | #[test ] |
| 415 | #[cfg (feature = "pretty-print" )] |
| 416 | fn test_pretty_print() { |
| 417 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); |
| 418 | |
| 419 | let expected = r#"{ |
| 420 | "pos": [ |
| 421 | 0, |
| 422 | 3 |
| 423 | ], |
| 424 | "rule": "a", |
| 425 | "inner": { |
| 426 | "pos": [ |
| 427 | 1, |
| 428 | 2 |
| 429 | ], |
| 430 | "pairs": [ |
| 431 | { |
| 432 | "pos": [ |
| 433 | 1, |
| 434 | 2 |
| 435 | ], |
| 436 | "rule": "b", |
| 437 | "inner": "b" |
| 438 | } |
| 439 | ] |
| 440 | } |
| 441 | }"# ; |
| 442 | |
| 443 | assert_eq!(expected, pair.to_json()); |
| 444 | } |
| 445 | |
| 446 | #[test ] |
| 447 | fn pair_into_inner() { |
| 448 | let pair = AbcParser::parse(Rule::a, "abcde" ).unwrap().next().unwrap(); // the tokens a(b()) |
| 449 | |
| 450 | let pairs = pair.into_inner(); // the tokens b() |
| 451 | |
| 452 | assert_eq!(2, pairs.tokens().count()); |
| 453 | } |
| 454 | |
| 455 | #[test ] |
| 456 | fn get_input_of_pair() { |
| 457 | let input = "abcde" ; |
| 458 | let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap(); |
| 459 | |
| 460 | assert_eq!(input, pair.get_input()); |
| 461 | } |
| 462 | } |
| 463 | |