1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Constructs useful in prefix, postfix, and infix operator parsing with the
11//! Pratt parsing method.
12
13use core::iter::Peekable;
14use core::marker::PhantomData;
15use core::ops::BitOr;
16
17use alloc::boxed::Box;
18use alloc::collections::BTreeMap;
19
20use crate::iterators::Pair;
21use crate::RuleType;
22
23/// Associativity of an infix binary operator, used by [`Op::infix(Assoc)`].
24///
25/// [`Op::infix(Assoc)`]: struct.Op.html
26#[derive(Clone, Copy, Debug, Eq, PartialEq)]
27pub enum Assoc {
28 /// Left operator associativity. Evaluate expressions from left-to-right.
29 Left,
30 /// Right operator associativity. Evaluate expressions from right-to-left.
31 Right,
32}
33
34type Prec = u32;
35const PREC_STEP: Prec = 10;
36
37/// An operator that corresponds to a rule.
38pub struct Op<R: RuleType> {
39 rule: R,
40 affix: Affix,
41 next: Option<Box<Op<R>>>,
42}
43
44enum Affix {
45 Prefix,
46 Postfix,
47 Infix(Assoc),
48}
49
50impl<R: RuleType> Op<R> {
51 /// Defines `rule` as a prefix unary operator.
52 pub fn prefix(rule: R) -> Self {
53 Self {
54 rule,
55 affix: Affix::Prefix,
56 next: None,
57 }
58 }
59
60 /// Defines `rule` as a postfix unary operator.
61 pub fn postfix(rule: R) -> Self {
62 Self {
63 rule,
64 affix: Affix::Postfix,
65 next: None,
66 }
67 }
68
69 /// Defines `rule` as an infix binary operator with associativity `assoc`.
70 pub fn infix(rule: R, assoc: Assoc) -> Self {
71 Self {
72 rule,
73 affix: Affix::Infix(assoc),
74 next: None,
75 }
76 }
77}
78
79impl<R: RuleType> BitOr for Op<R> {
80 type Output = Self;
81
82 fn bitor(mut self, rhs: Self) -> Self {
83 fn assign_next<R: RuleType>(op: &mut Op<R>, next: Op<R>) {
84 if let Some(ref mut child: &mut Box>) = op.next {
85 assign_next(op:child, next);
86 } else {
87 op.next = Some(Box::new(next));
88 }
89 }
90
91 assign_next(&mut self, next:rhs);
92 self
93 }
94}
95
96/// Struct containing operators and precedences, which can perform [Pratt parsing][1] on
97/// primary, prefix, postfix and infix expressions over [`Pairs`]. The tokens in [`Pairs`]
98/// should alternate in the order:
99/// `prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix*)*`
100///
101/// # Panics
102///
103/// Panics will occur when:
104/// * `pairs` is empty
105/// * The tokens in `pairs` does not alternate in the expected order.
106/// * No `map_*` function is specified for a certain kind of operator encountered in `pairs`.
107///
108/// # Example
109///
110/// The following pest grammar defines a calculator which can be used for Pratt parsing.
111///
112/// ```pest
113/// WHITESPACE = _{ " " | "\t" | NEWLINE }
114///
115/// program = { SOI ~ expr ~ EOI }
116/// expr = { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* }
117/// infix = _{ add | sub | mul | div | pow }
118/// add = { "+" } // Addition
119/// sub = { "-" } // Subtraction
120/// mul = { "*" } // Multiplication
121/// div = { "/" } // Division
122/// pow = { "^" } // Exponentiation
123/// prefix = _{ neg }
124/// neg = { "-" } // Negation
125/// postfix = _{ fac }
126/// fac = { "!" } // Factorial
127/// primary = _{ int | "(" ~ expr ~ ")" }
128/// int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) }
129/// ```
130///
131/// Below is a [`PrattParser`] that is able to parse an `expr` in the above grammar. The order
132/// of precedence corresponds to the order in which [`op`] is called. Thus, `mul` will
133/// have higher precedence than `add`. Operators can also be chained with `|` to give them equal
134/// precedence.
135///
136/// ```
137/// # use pest::pratt_parser::{Assoc, Op, PrattParser};
138/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
139/// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg }
140/// let pratt =
141/// PrattParser::new()
142/// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left))
143/// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left))
144/// .op(Op::infix(Rule::pow, Assoc::Right))
145/// .op(Op::prefix(Rule::neg))
146/// .op(Op::postfix(Rule::fac));
147/// ```
148///
149/// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`],
150/// [`map_infix`] and [`parse`] methods as follows:
151///
152/// ```
153/// # use pest::{iterators::Pairs, pratt_parser::PrattParser};
154/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
155/// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg }
156/// fn parse_expr(pairs: Pairs<Rule>, pratt: &PrattParser<Rule>) -> i32 {
157/// pratt
158/// .map_primary(|primary| match primary.as_rule() {
159/// Rule::int => primary.as_str().parse().unwrap(),
160/// Rule::expr => parse_expr(primary.into_inner(), pratt), // from "(" ~ expr ~ ")"
161/// _ => unreachable!(),
162/// })
163/// .map_prefix(|op, rhs| match op.as_rule() {
164/// Rule::neg => -rhs,
165/// _ => unreachable!(),
166/// })
167/// .map_postfix(|lhs, op| match op.as_rule() {
168/// Rule::fac => (1..lhs+1).product(),
169/// _ => unreachable!(),
170/// })
171/// .map_infix(|lhs, op, rhs| match op.as_rule() {
172/// Rule::add => lhs + rhs,
173/// Rule::sub => lhs - rhs,
174/// Rule::mul => lhs * rhs,
175/// Rule::div => lhs / rhs,
176/// Rule::pow => (1..rhs+1).map(|_| lhs).product(),
177/// _ => unreachable!(),
178/// })
179/// .parse(pairs)
180/// }
181/// ```
182///
183/// Note that [`map_prefix`], [`map_postfix`] and [`map_infix`] only need to be specified if the
184/// grammar contains the corresponding operators.
185///
186/// [1]: https://en.wikipedia.org/wiki/Pratt_parser
187/// [`Pairs`]: ../iterators/struct.Pairs.html
188/// [`PrattParser`]: struct.PrattParser.html
189/// [`map_primary`]: struct.PrattParser.html#method.map_primary
190/// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix
191/// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix
192/// [`map_infix`]: struct.PrattParserMap.html#method.map_infix
193/// [`parse`]: struct.PrattParserMap.html#method.parse
194/// [`op`]: struct.PrattParserMap.html#method.op
195pub struct PrattParser<R: RuleType> {
196 prec: Prec,
197 ops: BTreeMap<R, (Affix, Prec)>,
198 has_prefix: bool,
199 has_postfix: bool,
200 has_infix: bool,
201}
202
203impl<R: RuleType> Default for PrattParser<R> {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209impl<R: RuleType> PrattParser<R> {
210 /// Instantiate a new `PrattParser`.
211 pub fn new() -> Self {
212 Self {
213 prec: PREC_STEP,
214 ops: BTreeMap::new(),
215 has_prefix: false,
216 has_postfix: false,
217 has_infix: false,
218 }
219 }
220
221 /// Add `op` to `PrattParser`.
222 pub fn op(mut self, op: Op<R>) -> Self {
223 self.prec += PREC_STEP;
224 let mut iter = Some(op);
225 while let Some(Op { rule, affix, next }) = iter.take() {
226 match affix {
227 Affix::Prefix => self.has_prefix = true,
228 Affix::Postfix => self.has_postfix = true,
229 Affix::Infix(_) => self.has_infix = true,
230 }
231 self.ops.insert(rule, (affix, self.prec));
232 iter = next.map(|op| *op);
233 }
234 self
235 }
236
237 /// Maps primary expressions with a closure `primary`.
238 pub fn map_primary<'pratt, 'a, 'i, X, T>(
239 &'pratt self,
240 primary: X,
241 ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T>
242 where
243 X: FnMut(Pair<'i, R>) -> T,
244 R: 'pratt,
245 {
246 PrattParserMap {
247 pratt: self,
248 primary,
249 prefix: None,
250 postfix: None,
251 infix: None,
252 phantom: PhantomData,
253 }
254 }
255}
256
257type PrefixFn<'a, 'i, R, T> = Box<dyn FnMut(Pair<'i, R>, T) -> T + 'a>;
258type PostfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>) -> T + 'a>;
259type InfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>, T) -> T + 'a>;
260
261/// Product of calling [`map_primary`] on [`PrattParser`], defines how expressions should
262/// be mapped.
263///
264/// [`map_primary`]: struct.PrattParser.html#method.map_primary
265/// [`PrattParser`]: struct.PrattParser.html
266pub struct PrattParserMap<'pratt, 'a, 'i, R, F, T>
267where
268 R: RuleType,
269 F: FnMut(Pair<'i, R>) -> T,
270{
271 pratt: &'pratt PrattParser<R>,
272 primary: F,
273 prefix: Option<PrefixFn<'a, 'i, R, T>>,
274 postfix: Option<PostfixFn<'a, 'i, R, T>>,
275 infix: Option<InfixFn<'a, 'i, R, T>>,
276 phantom: PhantomData<T>,
277}
278
279impl<'pratt, 'a, 'i, R, F, T> PrattParserMap<'pratt, 'a, 'i, R, F, T>
280where
281 R: RuleType + 'pratt,
282 F: FnMut(Pair<'i, R>) -> T,
283{
284 /// Maps prefix operators with closure `prefix`.
285 pub fn map_prefix<X>(mut self, prefix: X) -> Self
286 where
287 X: FnMut(Pair<'i, R>, T) -> T + 'a,
288 {
289 self.prefix = Some(Box::new(prefix));
290 self
291 }
292
293 /// Maps postfix operators with closure `postfix`.
294 pub fn map_postfix<X>(mut self, postfix: X) -> Self
295 where
296 X: FnMut(T, Pair<'i, R>) -> T + 'a,
297 {
298 self.postfix = Some(Box::new(postfix));
299 self
300 }
301
302 /// Maps infix operators with a closure `infix`.
303 pub fn map_infix<X>(mut self, infix: X) -> Self
304 where
305 X: FnMut(T, Pair<'i, R>, T) -> T + 'a,
306 {
307 self.infix = Some(Box::new(infix));
308 self
309 }
310
311 /// The last method to call on the provided pairs to execute the Pratt
312 /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`],
313 /// and [`map_infix`] methods).
314 ///
315 /// [`map_primary`]: struct.PrattParser.html#method.map_primary
316 /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix
317 /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix
318 /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix
319 pub fn parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T {
320 self.expr(&mut pairs.peekable(), 0)
321 }
322
323 fn expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T {
324 let mut lhs = self.nud(pairs);
325 while rbp < self.lbp(pairs) {
326 lhs = self.led(pairs, lhs);
327 }
328 lhs
329 }
330
331 /// Null-Denotation
332 ///
333 /// "the action that should happen when the symbol is encountered
334 /// as start of an expression (most notably, prefix operators)
335 fn nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T {
336 let pair = pairs.next().expect("Pratt parsing expects non-empty Pairs");
337 match self.pratt.ops.get(&pair.as_rule()) {
338 Some((Affix::Prefix, prec)) => {
339 let rhs = self.expr(pairs, *prec - 1);
340 match self.prefix.as_mut() {
341 Some(prefix) => prefix(pair, rhs),
342 None => panic!("Could not map {}, no `.map_prefix(...)` specified", pair),
343 }
344 }
345 None => (self.primary)(pair),
346 _ => panic!("Expected prefix or primary expression, found {}", pair),
347 }
348 }
349
350 /// Left-Denotation
351 ///
352 /// "the action that should happen when the symbol is encountered
353 /// after the start of an expression (most notably, infix and postfix operators)"
354 fn led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T {
355 let pair = pairs.next().unwrap();
356 match self.pratt.ops.get(&pair.as_rule()) {
357 Some((Affix::Infix(assoc), prec)) => {
358 let rhs = match *assoc {
359 Assoc::Left => self.expr(pairs, *prec),
360 Assoc::Right => self.expr(pairs, *prec - 1),
361 };
362 match self.infix.as_mut() {
363 Some(infix) => infix(lhs, pair, rhs),
364 None => panic!("Could not map {}, no `.map_infix(...)` specified", pair),
365 }
366 }
367 Some((Affix::Postfix, _)) => match self.postfix.as_mut() {
368 Some(postfix) => postfix(lhs, pair),
369 None => panic!("Could not map {}, no `.map_postfix(...)` specified", pair),
370 },
371 _ => panic!("Expected postfix or infix expression, found {}", pair),
372 }
373 }
374
375 /// Left-Binding-Power
376 ///
377 /// "describes the symbol's precedence in infix form (most notably, operator precedence)"
378 fn lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec {
379 match pairs.peek() {
380 Some(pair) => match self.pratt.ops.get(&pair.as_rule()) {
381 Some((_, prec)) => *prec,
382 None => panic!("Expected operator, found {}", pair),
383 },
384 None => 0,
385 }
386 }
387}
388