1#![deny(unreachable_pub)]
2#![deny(elided_lifetimes_in_paths)]
3
4use std::borrow::Cow;
5use std::cell::Cell;
6use std::{fmt, str};
7
8use nom::branch::alt;
9use nom::bytes::complete::{escaped, is_not, tag, take_till};
10use nom::character::complete::{anychar, char, one_of, satisfy};
11use nom::combinator::{cut, eof, map, opt, recognize};
12use nom::error::{Error, ErrorKind, FromExternalError};
13use nom::multi::{many0_count, many1};
14use nom::sequence::{delimited, pair, preceded, terminated, tuple};
15use nom::{error_position, AsChar, InputTakeAtPosition};
16
17pub mod expr;
18pub use expr::Expr;
19pub mod node;
20pub use node::Node;
21#[cfg(test)]
22mod tests;
23
24mod _parsed {
25 use std::cmp::PartialEq;
26 use std::{fmt, mem};
27
28 use super::node::Node;
29 use super::{Ast, ParseError, Syntax};
30
31 #[derive(Default)]
32 pub struct Parsed {
33 // `source` must outlive `ast`, so `ast` must be declared before `source`
34 ast: Ast<'static>,
35 #[allow(dead_code)]
36 source: String,
37 }
38
39 impl Parsed {
40 pub fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
41 // Self-referential borrowing: `self` will keep the source alive as `String`,
42 // internally we will transmute it to `&'static str` to satisfy the compiler.
43 // However, we only expose the nodes with a lifetime limited to `self`.
44 let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) };
45 let ast = Ast::from_str(src, syntax)?;
46 Ok(Self { ast, source })
47 }
48
49 // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
50 pub fn nodes(&self) -> &[Node<'_>] {
51 &self.ast.nodes
52 }
53 }
54
55 impl fmt::Debug for Parsed {
56 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57 f.debug_struct("Parsed")
58 .field("nodes", &self.ast.nodes)
59 .finish_non_exhaustive()
60 }
61 }
62
63 impl PartialEq for Parsed {
64 fn eq(&self, other: &Self) -> bool {
65 self.ast.nodes == other.ast.nodes
66 }
67 }
68}
69
70pub use _parsed::Parsed;
71
72#[derive(Debug, Default)]
73pub struct Ast<'a> {
74 nodes: Vec<Node<'a>>,
75}
76
77impl<'a> Ast<'a> {
78 pub fn from_str(src: &'a str, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
79 let parse = |i: &'a str| Node::many(i, &State::new(syntax));
80 let (input, message) = match terminated(parse, cut(eof))(src) {
81 Ok(("", nodes)) => return Ok(Self { nodes }),
82 Ok(_) => unreachable!("eof() is not eof?"),
83 Err(
84 nom::Err::Error(ErrorContext { input, message, .. })
85 | nom::Err::Failure(ErrorContext { input, message, .. }),
86 ) => (input, message),
87 Err(nom::Err::Incomplete(_)) => return Err(ParseError("parsing incomplete".into())),
88 };
89
90 let offset = src.len() - input.len();
91 let (source_before, source_after) = src.split_at(offset);
92
93 let source_after = match source_after.char_indices().enumerate().take(41).last() {
94 Some((40, (i, _))) => format!("{:?}...", &source_after[..i]),
95 _ => format!("{source_after:?}"),
96 };
97
98 let (row, last_line) = source_before.lines().enumerate().last().unwrap_or_default();
99 let column = last_line.chars().count();
100
101 let msg = format!(
102 "{}problems parsing template source at row {}, column {} near:\n{}",
103 if let Some(message) = message {
104 format!("{message}\n")
105 } else {
106 String::new()
107 },
108 row + 1,
109 column,
110 source_after,
111 );
112
113 Err(ParseError(msg))
114 }
115
116 pub fn nodes(&self) -> &[Node<'a>] {
117 &self.nodes
118 }
119}
120
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct ParseError(String);
123
124impl std::error::Error for ParseError {}
125
126impl fmt::Display for ParseError {
127 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128 self.0.fmt(f)
129 }
130}
131
132pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), nom::Err<ErrorContext<'a>>>;
133
134/// This type is used to handle `nom` errors and in particular to add custom error messages.
135/// It used to generate `ParserError`.
136///
137/// It cannot be used to replace `ParseError` because it expects a generic, which would make
138/// `askama`'s users experience less good (since this generic is only needed for `nom`).
139#[derive(Debug)]
140pub(crate) struct ErrorContext<'a> {
141 pub(crate) input: &'a str,
142 pub(crate) message: Option<Cow<'static, str>>,
143}
144
145impl<'a> nom::error::ParseError<&'a str> for ErrorContext<'a> {
146 fn from_error_kind(input: &'a str, _code: ErrorKind) -> Self {
147 Self {
148 input,
149 message: None,
150 }
151 }
152
153 fn append(_: &'a str, _: ErrorKind, other: Self) -> Self {
154 other
155 }
156}
157
158impl<'a, E: std::fmt::Display> FromExternalError<&'a str, E> for ErrorContext<'a> {
159 fn from_external_error(input: &'a str, _kind: ErrorKind, e: E) -> Self {
160 Self {
161 input,
162 message: Some(Cow::Owned(e.to_string())),
163 }
164 }
165}
166
167impl<'a> ErrorContext<'a> {
168 pub(crate) fn from_err(error: nom::Err<Error<&'a str>>) -> nom::Err<Self> {
169 match error {
170 nom::Err::Incomplete(i: Needed) => nom::Err::Incomplete(i),
171 nom::Err::Failure(Error { input: &str, .. }) => nom::Err::Failure(Self {
172 input,
173 message: None,
174 }),
175 nom::Err::Error(Error { input: &str, .. }) => nom::Err::Error(Self {
176 input,
177 message: None,
178 }),
179 }
180 }
181}
182
183fn is_ws(c: char) -> bool {
184 matches!(c, ' ' | '\t' | '\r' | '\n')
185}
186
187fn not_ws(c: char) -> bool {
188 !is_ws(c)
189}
190
191fn ws<'a, O>(
192 inner: impl FnMut(&'a str) -> ParseResult<'a, O>,
193) -> impl FnMut(&'a str) -> ParseResult<'a, O> {
194 delimited(first:take_till(not_ws), second:inner, third:take_till(cond:not_ws))
195}
196
197/// Skips input until `end` was found, but does not consume it.
198/// Returns tuple that would be returned when parsing `end`.
199fn skip_till<'a, O>(
200 end: impl FnMut(&'a str) -> ParseResult<'a, O>,
201) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
202 enum Next<O> {
203 IsEnd(O),
204 NotEnd(char),
205 }
206 let mut next: impl FnMut(&str) -> Result<…, …> = alt((map(parser:end, f:Next::IsEnd), map(parser:anychar, f:Next::NotEnd)));
207 move |start: &'a str| {
208 let mut i: &str = start;
209 loop {
210 let (j: &str, is_end: Next) = next(i)?;
211 match is_end {
212 Next::IsEnd(lookahead: O) => return Ok((i, (j, lookahead))),
213 Next::NotEnd(_) => i = j,
214 }
215 }
216 }
217}
218
219fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> ParseResult<'_> {
220 move |i: &'a str| -> ParseResult<'a> {
221 let (j: &str, v: &str) = identifier(input:i)?;
222 if k == v {
223 Ok((j, v))
224 } else {
225 Err(nom::Err::Error(error_position!(i, ErrorKind::Tag)))
226 }
227 }
228}
229
230fn identifier(input: &str) -> ParseResult<'_> {
231 fn start(s: &str) -> ParseResult<'_> {
232 s.split_at_position1_complete(
233 |c| !(c.is_alpha() || c == '_' || c >= '\u{0080}'),
234 e:nom::error::ErrorKind::Alpha,
235 )
236 }
237
238 fn tail(s: &str) -> ParseResult<'_> {
239 s.split_at_position1_complete(
240 |c| !(c.is_alphanum() || c == '_' || c >= '\u{0080}'),
241 e:nom::error::ErrorKind::Alpha,
242 )
243 }
244
245 recognize(parser:pair(first:start, second:opt(tail)))(input)
246}
247
248fn bool_lit(i: &str) -> ParseResult<'_> {
249 alt((keyword("false"), keyword("true")))(i)
250}
251
252fn num_lit(i: &str) -> ParseResult<'_> {
253 let integer_suffix = |i| {
254 alt((
255 tag("i8"),
256 tag("i16"),
257 tag("i32"),
258 tag("i64"),
259 tag("i128"),
260 tag("isize"),
261 tag("u8"),
262 tag("u16"),
263 tag("u32"),
264 tag("u64"),
265 tag("u128"),
266 tag("usize"),
267 ))(i)
268 };
269 let float_suffix = |i| alt((tag("f32"), tag("f64")))(i);
270
271 recognize(tuple((
272 opt(char('-')),
273 alt((
274 recognize(tuple((
275 char('0'),
276 alt((
277 recognize(tuple((char('b'), separated_digits(2, false)))),
278 recognize(tuple((char('o'), separated_digits(8, false)))),
279 recognize(tuple((char('x'), separated_digits(16, false)))),
280 )),
281 opt(integer_suffix),
282 ))),
283 recognize(tuple((
284 separated_digits(10, true),
285 opt(alt((
286 integer_suffix,
287 float_suffix,
288 recognize(tuple((
289 opt(tuple((char('.'), separated_digits(10, true)))),
290 one_of("eE"),
291 opt(one_of("+-")),
292 separated_digits(10, false),
293 opt(float_suffix),
294 ))),
295 recognize(tuple((
296 char('.'),
297 separated_digits(10, true),
298 opt(float_suffix),
299 ))),
300 ))),
301 ))),
302 )),
303 )))(i)
304}
305
306/// Underscore separated digits of the given base, unless `start` is true this may start
307/// with an underscore.
308fn separated_digits(radix: u32, start: bool) -> impl Fn(&str) -> ParseResult<'_> {
309 move |i: &str| {
310 recognize(parser:tuple((
311 |i: &str| match start {
312 true => Ok((i, 0)),
313 false => many0_count(char('_'))(i),
314 },
315 satisfy(|ch: char| ch.is_digit(radix)),
316 many0_count(satisfy(|ch: char| ch == '_' || ch.is_digit(radix))),
317 )))(i)
318 }
319}
320
321fn str_lit(i: &str) -> ParseResult<'_> {
322 let (i: &str, s: Option<&str>) = delimited(
323 first:char('"'),
324 second:opt(escaped(is_not("\\\""), '\\', anychar)),
325 third:char('"'),
326 )(i)?;
327 Ok((i, s.unwrap_or_default()))
328}
329
330fn char_lit(i: &str) -> ParseResult<'_> {
331 let (i: &str, s: Option<&str>) = delimited(
332 first:char('\''),
333 second:opt(escaped(is_not("\\\'"), '\\', anychar)),
334 third:char('\''),
335 )(i)?;
336 Ok((i, s.unwrap_or_default()))
337}
338
339enum PathOrIdentifier<'a> {
340 Path(Vec<&'a str>),
341 Identifier(&'a str),
342}
343
344fn path_or_identifier(i: &str) -> ParseResult<'_, PathOrIdentifier<'_>> {
345 let root = ws(opt(tag("::")));
346 let tail = opt(many1(preceded(ws(tag("::")), identifier)));
347
348 let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
349 let rest = rest.as_deref().unwrap_or_default();
350
351 // The returned identifier can be assumed to be path if:
352 // - it is an absolute path (starts with `::`), or
353 // - it has multiple components (at least one `::`), or
354 // - the first letter is uppercase
355 match (root, start, rest) {
356 (Some(_), start, tail) => {
357 let mut path = Vec::with_capacity(2 + tail.len());
358 path.push("");
359 path.push(start);
360 path.extend(rest);
361 Ok((i, PathOrIdentifier::Path(path)))
362 }
363 (None, name, []) if name.chars().next().map_or(true, |c| c.is_lowercase()) => {
364 Ok((i, PathOrIdentifier::Identifier(name)))
365 }
366 (None, start, tail) => {
367 let mut path = Vec::with_capacity(1 + tail.len());
368 path.push(start);
369 path.extend(rest);
370 Ok((i, PathOrIdentifier::Path(path)))
371 }
372 }
373}
374
375struct State<'a> {
376 syntax: &'a Syntax<'a>,
377 loop_depth: Cell<usize>,
378 level: Cell<Level>,
379}
380
381impl<'a> State<'a> {
382 fn new(syntax: &'a Syntax<'a>) -> State<'a> {
383 State {
384 syntax,
385 loop_depth: Cell::new(0),
386 level: Cell::new(Level::default()),
387 }
388 }
389
390 fn nest<'b>(&self, i: &'b str) -> ParseResult<'b, ()> {
391 let (_, level) = self.level.get().nest(i)?;
392 self.level.set(level);
393 Ok((i, ()))
394 }
395
396 fn leave(&self) {
397 self.level.set(self.level.get().leave());
398 }
399
400 fn tag_block_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
401 tag(self.syntax.block_start)(i)
402 }
403
404 fn tag_block_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
405 tag(self.syntax.block_end)(i)
406 }
407
408 fn tag_comment_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
409 tag(self.syntax.comment_start)(i)
410 }
411
412 fn tag_comment_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
413 tag(self.syntax.comment_end)(i)
414 }
415
416 fn tag_expr_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
417 tag(self.syntax.expr_start)(i)
418 }
419
420 fn tag_expr_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
421 tag(self.syntax.expr_end)(i)
422 }
423
424 fn enter_loop(&self) {
425 self.loop_depth.set(self.loop_depth.get() + 1);
426 }
427
428 fn leave_loop(&self) {
429 self.loop_depth.set(self.loop_depth.get() - 1);
430 }
431
432 fn is_in_loop(&self) -> bool {
433 self.loop_depth.get() > 0
434 }
435}
436
437#[derive(Debug)]
438pub struct Syntax<'a> {
439 pub block_start: &'a str,
440 pub block_end: &'a str,
441 pub expr_start: &'a str,
442 pub expr_end: &'a str,
443 pub comment_start: &'a str,
444 pub comment_end: &'a str,
445}
446
447impl Default for Syntax<'static> {
448 fn default() -> Self {
449 Self {
450 block_start: "{%",
451 block_end: "%}",
452 expr_start: "{{",
453 expr_end: "}}",
454 comment_start: "{#",
455 comment_end: "#}",
456 }
457 }
458}
459
460#[derive(Clone, Copy, Default)]
461pub(crate) struct Level(u8);
462
463impl Level {
464 fn nest(self, i: &str) -> ParseResult<'_, Level> {
465 if self.0 >= Self::MAX_DEPTH {
466 return Err(ErrorContext::from_err(error:nom::Err::Failure(error_position!(
467 i,
468 ErrorKind::TooLarge
469 ))));
470 }
471
472 Ok((i, Level(self.0 + 1)))
473 }
474
475 fn leave(&self) -> Self {
476 Level(self.0 - 1)
477 }
478
479 const MAX_DEPTH: u8 = 128;
480}
481