1 | #![deny (unreachable_pub)] |
2 | #![deny (elided_lifetimes_in_paths)] |
3 | |
4 | use std::borrow::Cow; |
5 | use std::cell::Cell; |
6 | use std::{fmt, str}; |
7 | |
8 | use nom::branch::alt; |
9 | use nom::bytes::complete::{escaped, is_not, tag, take_till}; |
10 | use nom::character::complete::{anychar, char, one_of, satisfy}; |
11 | use nom::combinator::{cut, eof, map, opt, recognize}; |
12 | use nom::error::{Error, ErrorKind, FromExternalError}; |
13 | use nom::multi::{many0_count, many1}; |
14 | use nom::sequence::{delimited, pair, preceded, terminated, tuple}; |
15 | use nom::{error_position, AsChar, InputTakeAtPosition}; |
16 | |
17 | pub mod expr; |
18 | pub use expr::Expr; |
19 | pub mod node; |
20 | pub use node::Node; |
21 | #[cfg (test)] |
22 | mod tests; |
23 | |
24 | mod _parsed { |
25 | use std::cmp::PartialEq; |
26 | use std::{fmt, mem}; |
27 | |
28 | use super::node::Node; |
29 | use super::{Ast, ParseError, Syntax}; |
30 | |
31 | #[derive (Default)] |
32 | pub struct Parsed { |
33 | // `source` must outlive `ast`, so `ast` must be declared before `source` |
34 | ast: Ast<'static>, |
35 | #[allow (dead_code)] |
36 | source: String, |
37 | } |
38 | |
39 | impl Parsed { |
40 | pub fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> { |
41 | // Self-referential borrowing: `self` will keep the source alive as `String`, |
42 | // internally we will transmute it to `&'static str` to satisfy the compiler. |
43 | // However, we only expose the nodes with a lifetime limited to `self`. |
44 | let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) }; |
45 | let ast = Ast::from_str(src, syntax)?; |
46 | Ok(Self { ast, source }) |
47 | } |
48 | |
49 | // The return value's lifetime must be limited to `self` to uphold the unsafe invariant. |
50 | pub fn nodes(&self) -> &[Node<'_>] { |
51 | &self.ast.nodes |
52 | } |
53 | } |
54 | |
55 | impl fmt::Debug for Parsed { |
56 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
57 | f.debug_struct("Parsed" ) |
58 | .field("nodes" , &self.ast.nodes) |
59 | .finish_non_exhaustive() |
60 | } |
61 | } |
62 | |
63 | impl PartialEq for Parsed { |
64 | fn eq(&self, other: &Self) -> bool { |
65 | self.ast.nodes == other.ast.nodes |
66 | } |
67 | } |
68 | } |
69 | |
70 | pub use _parsed::Parsed; |
71 | |
72 | #[derive (Debug, Default)] |
73 | pub struct Ast<'a> { |
74 | nodes: Vec<Node<'a>>, |
75 | } |
76 | |
77 | impl<'a> Ast<'a> { |
78 | pub fn from_str(src: &'a str, syntax: &Syntax<'_>) -> Result<Self, ParseError> { |
79 | let parse = |i: &'a str| Node::many(i, &State::new(syntax)); |
80 | let (input, message) = match terminated(parse, cut(eof))(src) { |
81 | Ok(("" , nodes)) => return Ok(Self { nodes }), |
82 | Ok(_) => unreachable!("eof() is not eof?" ), |
83 | Err( |
84 | nom::Err::Error(ErrorContext { input, message, .. }) |
85 | | nom::Err::Failure(ErrorContext { input, message, .. }), |
86 | ) => (input, message), |
87 | Err(nom::Err::Incomplete(_)) => return Err(ParseError("parsing incomplete" .into())), |
88 | }; |
89 | |
90 | let offset = src.len() - input.len(); |
91 | let (source_before, source_after) = src.split_at(offset); |
92 | |
93 | let source_after = match source_after.char_indices().enumerate().take(41).last() { |
94 | Some((40, (i, _))) => format!(" {:?}..." , &source_after[..i]), |
95 | _ => format!(" {source_after:?}" ), |
96 | }; |
97 | |
98 | let (row, last_line) = source_before.lines().enumerate().last().unwrap_or_default(); |
99 | let column = last_line.chars().count(); |
100 | |
101 | let msg = format!( |
102 | " {}problems parsing template source at row {}, column {} near: \n{}" , |
103 | if let Some(message) = message { |
104 | format!(" {message}\n" ) |
105 | } else { |
106 | String::new() |
107 | }, |
108 | row + 1, |
109 | column, |
110 | source_after, |
111 | ); |
112 | |
113 | Err(ParseError(msg)) |
114 | } |
115 | |
116 | pub fn nodes(&self) -> &[Node<'a>] { |
117 | &self.nodes |
118 | } |
119 | } |
120 | |
121 | #[derive (Debug, Clone, PartialEq, Eq)] |
122 | pub struct ParseError(String); |
123 | |
124 | impl std::error::Error for ParseError {} |
125 | |
126 | impl fmt::Display for ParseError { |
127 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
128 | self.0.fmt(f) |
129 | } |
130 | } |
131 | |
132 | pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), nom::Err<ErrorContext<'a>>>; |
133 | |
134 | /// This type is used to handle `nom` errors and in particular to add custom error messages. |
135 | /// It used to generate `ParserError`. |
136 | /// |
137 | /// It cannot be used to replace `ParseError` because it expects a generic, which would make |
138 | /// `askama`'s users experience less good (since this generic is only needed for `nom`). |
139 | #[derive (Debug)] |
140 | pub(crate) struct ErrorContext<'a> { |
141 | pub(crate) input: &'a str, |
142 | pub(crate) message: Option<Cow<'static, str>>, |
143 | } |
144 | |
145 | impl<'a> nom::error::ParseError<&'a str> for ErrorContext<'a> { |
146 | fn from_error_kind(input: &'a str, _code: ErrorKind) -> Self { |
147 | Self { |
148 | input, |
149 | message: None, |
150 | } |
151 | } |
152 | |
153 | fn append(_: &'a str, _: ErrorKind, other: Self) -> Self { |
154 | other |
155 | } |
156 | } |
157 | |
158 | impl<'a, E: std::fmt::Display> FromExternalError<&'a str, E> for ErrorContext<'a> { |
159 | fn from_external_error(input: &'a str, _kind: ErrorKind, e: E) -> Self { |
160 | Self { |
161 | input, |
162 | message: Some(Cow::Owned(e.to_string())), |
163 | } |
164 | } |
165 | } |
166 | |
167 | impl<'a> ErrorContext<'a> { |
168 | pub(crate) fn from_err(error: nom::Err<Error<&'a str>>) -> nom::Err<Self> { |
169 | match error { |
170 | nom::Err::Incomplete(i: Needed) => nom::Err::Incomplete(i), |
171 | nom::Err::Failure(Error { input: &str, .. }) => nom::Err::Failure(Self { |
172 | input, |
173 | message: None, |
174 | }), |
175 | nom::Err::Error(Error { input: &str, .. }) => nom::Err::Error(Self { |
176 | input, |
177 | message: None, |
178 | }), |
179 | } |
180 | } |
181 | } |
182 | |
183 | fn is_ws(c: char) -> bool { |
184 | matches!(c, ' ' | ' \t' | ' \r' | ' \n' ) |
185 | } |
186 | |
187 | fn not_ws(c: char) -> bool { |
188 | !is_ws(c) |
189 | } |
190 | |
191 | fn ws<'a, O>( |
192 | inner: impl FnMut(&'a str) -> ParseResult<'a, O>, |
193 | ) -> impl FnMut(&'a str) -> ParseResult<'a, O> { |
194 | delimited(first:take_till(not_ws), second:inner, third:take_till(cond:not_ws)) |
195 | } |
196 | |
197 | /// Skips input until `end` was found, but does not consume it. |
198 | /// Returns tuple that would be returned when parsing `end`. |
199 | fn skip_till<'a, O>( |
200 | end: impl FnMut(&'a str) -> ParseResult<'a, O>, |
201 | ) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> { |
202 | enum Next<O> { |
203 | IsEnd(O), |
204 | NotEnd(char), |
205 | } |
206 | let mut next: impl FnMut(&str) -> Result<…, …> = alt((map(parser:end, f:Next::IsEnd), map(parser:anychar, f:Next::NotEnd))); |
207 | move |start: &'a str| { |
208 | let mut i: &str = start; |
209 | loop { |
210 | let (j: &str, is_end: Next) = next(i)?; |
211 | match is_end { |
212 | Next::IsEnd(lookahead: O) => return Ok((i, (j, lookahead))), |
213 | Next::NotEnd(_) => i = j, |
214 | } |
215 | } |
216 | } |
217 | } |
218 | |
219 | fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> ParseResult<'_> { |
220 | move |i: &'a str| -> ParseResult<'a> { |
221 | let (j: &str, v: &str) = identifier(input:i)?; |
222 | if k == v { |
223 | Ok((j, v)) |
224 | } else { |
225 | Err(nom::Err::Error(error_position!(i, ErrorKind::Tag))) |
226 | } |
227 | } |
228 | } |
229 | |
230 | fn identifier(input: &str) -> ParseResult<'_> { |
231 | fn start(s: &str) -> ParseResult<'_> { |
232 | s.split_at_position1_complete( |
233 | |c| !(c.is_alpha() || c == '_' || c >= ' \u{0080}' ), |
234 | e:nom::error::ErrorKind::Alpha, |
235 | ) |
236 | } |
237 | |
238 | fn tail(s: &str) -> ParseResult<'_> { |
239 | s.split_at_position1_complete( |
240 | |c| !(c.is_alphanum() || c == '_' || c >= ' \u{0080}' ), |
241 | e:nom::error::ErrorKind::Alpha, |
242 | ) |
243 | } |
244 | |
245 | recognize(parser:pair(first:start, second:opt(tail)))(input) |
246 | } |
247 | |
248 | fn bool_lit(i: &str) -> ParseResult<'_> { |
249 | alt((keyword("false" ), keyword("true" )))(i) |
250 | } |
251 | |
252 | fn num_lit(i: &str) -> ParseResult<'_> { |
253 | let integer_suffix = |i| { |
254 | alt(( |
255 | tag("i8" ), |
256 | tag("i16" ), |
257 | tag("i32" ), |
258 | tag("i64" ), |
259 | tag("i128" ), |
260 | tag("isize" ), |
261 | tag("u8" ), |
262 | tag("u16" ), |
263 | tag("u32" ), |
264 | tag("u64" ), |
265 | tag("u128" ), |
266 | tag("usize" ), |
267 | ))(i) |
268 | }; |
269 | let float_suffix = |i| alt((tag("f32" ), tag("f64" )))(i); |
270 | |
271 | recognize(tuple(( |
272 | opt(char('-' )), |
273 | alt(( |
274 | recognize(tuple(( |
275 | char('0' ), |
276 | alt(( |
277 | recognize(tuple((char('b' ), separated_digits(2, false)))), |
278 | recognize(tuple((char('o' ), separated_digits(8, false)))), |
279 | recognize(tuple((char('x' ), separated_digits(16, false)))), |
280 | )), |
281 | opt(integer_suffix), |
282 | ))), |
283 | recognize(tuple(( |
284 | separated_digits(10, true), |
285 | opt(alt(( |
286 | integer_suffix, |
287 | float_suffix, |
288 | recognize(tuple(( |
289 | opt(tuple((char('.' ), separated_digits(10, true)))), |
290 | one_of("eE" ), |
291 | opt(one_of("+-" )), |
292 | separated_digits(10, false), |
293 | opt(float_suffix), |
294 | ))), |
295 | recognize(tuple(( |
296 | char('.' ), |
297 | separated_digits(10, true), |
298 | opt(float_suffix), |
299 | ))), |
300 | ))), |
301 | ))), |
302 | )), |
303 | )))(i) |
304 | } |
305 | |
306 | /// Underscore separated digits of the given base, unless `start` is true this may start |
307 | /// with an underscore. |
308 | fn separated_digits(radix: u32, start: bool) -> impl Fn(&str) -> ParseResult<'_> { |
309 | move |i: &str| { |
310 | recognize(parser:tuple(( |
311 | |i: &str| match start { |
312 | true => Ok((i, 0)), |
313 | false => many0_count(char('_' ))(i), |
314 | }, |
315 | satisfy(|ch: char| ch.is_digit(radix)), |
316 | many0_count(satisfy(|ch: char| ch == '_' || ch.is_digit(radix))), |
317 | )))(i) |
318 | } |
319 | } |
320 | |
321 | fn str_lit(i: &str) -> ParseResult<'_> { |
322 | let (i: &str, s: Option<&str>) = delimited( |
323 | first:char('"' ), |
324 | second:opt(escaped(is_not(" \\\"" ), ' \\' , anychar)), |
325 | third:char('"' ), |
326 | )(i)?; |
327 | Ok((i, s.unwrap_or_default())) |
328 | } |
329 | |
330 | fn char_lit(i: &str) -> ParseResult<'_> { |
331 | let (i: &str, s: Option<&str>) = delimited( |
332 | first:char(' \'' ), |
333 | second:opt(escaped(is_not(" \\\'" ), ' \\' , anychar)), |
334 | third:char(' \'' ), |
335 | )(i)?; |
336 | Ok((i, s.unwrap_or_default())) |
337 | } |
338 | |
339 | enum PathOrIdentifier<'a> { |
340 | Path(Vec<&'a str>), |
341 | Identifier(&'a str), |
342 | } |
343 | |
344 | fn path_or_identifier(i: &str) -> ParseResult<'_, PathOrIdentifier<'_>> { |
345 | let root = ws(opt(tag("::" ))); |
346 | let tail = opt(many1(preceded(ws(tag("::" )), identifier))); |
347 | |
348 | let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?; |
349 | let rest = rest.as_deref().unwrap_or_default(); |
350 | |
351 | // The returned identifier can be assumed to be path if: |
352 | // - it is an absolute path (starts with `::`), or |
353 | // - it has multiple components (at least one `::`), or |
354 | // - the first letter is uppercase |
355 | match (root, start, rest) { |
356 | (Some(_), start, tail) => { |
357 | let mut path = Vec::with_capacity(2 + tail.len()); |
358 | path.push("" ); |
359 | path.push(start); |
360 | path.extend(rest); |
361 | Ok((i, PathOrIdentifier::Path(path))) |
362 | } |
363 | (None, name, []) if name.chars().next().map_or(true, |c| c.is_lowercase()) => { |
364 | Ok((i, PathOrIdentifier::Identifier(name))) |
365 | } |
366 | (None, start, tail) => { |
367 | let mut path = Vec::with_capacity(1 + tail.len()); |
368 | path.push(start); |
369 | path.extend(rest); |
370 | Ok((i, PathOrIdentifier::Path(path))) |
371 | } |
372 | } |
373 | } |
374 | |
375 | struct State<'a> { |
376 | syntax: &'a Syntax<'a>, |
377 | loop_depth: Cell<usize>, |
378 | level: Cell<Level>, |
379 | } |
380 | |
381 | impl<'a> State<'a> { |
382 | fn new(syntax: &'a Syntax<'a>) -> State<'a> { |
383 | State { |
384 | syntax, |
385 | loop_depth: Cell::new(0), |
386 | level: Cell::new(Level::default()), |
387 | } |
388 | } |
389 | |
390 | fn nest<'b>(&self, i: &'b str) -> ParseResult<'b, ()> { |
391 | let (_, level) = self.level.get().nest(i)?; |
392 | self.level.set(level); |
393 | Ok((i, ())) |
394 | } |
395 | |
396 | fn leave(&self) { |
397 | self.level.set(self.level.get().leave()); |
398 | } |
399 | |
400 | fn tag_block_start<'i>(&self, i: &'i str) -> ParseResult<'i> { |
401 | tag(self.syntax.block_start)(i) |
402 | } |
403 | |
404 | fn tag_block_end<'i>(&self, i: &'i str) -> ParseResult<'i> { |
405 | tag(self.syntax.block_end)(i) |
406 | } |
407 | |
408 | fn tag_comment_start<'i>(&self, i: &'i str) -> ParseResult<'i> { |
409 | tag(self.syntax.comment_start)(i) |
410 | } |
411 | |
412 | fn tag_comment_end<'i>(&self, i: &'i str) -> ParseResult<'i> { |
413 | tag(self.syntax.comment_end)(i) |
414 | } |
415 | |
416 | fn tag_expr_start<'i>(&self, i: &'i str) -> ParseResult<'i> { |
417 | tag(self.syntax.expr_start)(i) |
418 | } |
419 | |
420 | fn tag_expr_end<'i>(&self, i: &'i str) -> ParseResult<'i> { |
421 | tag(self.syntax.expr_end)(i) |
422 | } |
423 | |
424 | fn enter_loop(&self) { |
425 | self.loop_depth.set(self.loop_depth.get() + 1); |
426 | } |
427 | |
428 | fn leave_loop(&self) { |
429 | self.loop_depth.set(self.loop_depth.get() - 1); |
430 | } |
431 | |
432 | fn is_in_loop(&self) -> bool { |
433 | self.loop_depth.get() > 0 |
434 | } |
435 | } |
436 | |
437 | #[derive (Debug)] |
438 | pub struct Syntax<'a> { |
439 | pub block_start: &'a str, |
440 | pub block_end: &'a str, |
441 | pub expr_start: &'a str, |
442 | pub expr_end: &'a str, |
443 | pub comment_start: &'a str, |
444 | pub comment_end: &'a str, |
445 | } |
446 | |
447 | impl Default for Syntax<'static> { |
448 | fn default() -> Self { |
449 | Self { |
450 | block_start: "{%" , |
451 | block_end: "%}" , |
452 | expr_start: "{{" , |
453 | expr_end: "}}" , |
454 | comment_start: "{#" , |
455 | comment_end: "#}" , |
456 | } |
457 | } |
458 | } |
459 | |
460 | #[derive (Clone, Copy, Default)] |
461 | pub(crate) struct Level(u8); |
462 | |
463 | impl Level { |
464 | fn nest(self, i: &str) -> ParseResult<'_, Level> { |
465 | if self.0 >= Self::MAX_DEPTH { |
466 | return Err(ErrorContext::from_err(error:nom::Err::Failure(error_position!( |
467 | i, |
468 | ErrorKind::TooLarge |
469 | )))); |
470 | } |
471 | |
472 | Ok((i, Level(self.0 + 1))) |
473 | } |
474 | |
475 | fn leave(&self) -> Self { |
476 | Level(self.0 - 1) |
477 | } |
478 | |
479 | const MAX_DEPTH: u8 = 128; |
480 | } |
481 | |