lib.rs source code [crates/askama_parser/src/lib.rs]

1	#![deny(unreachable_pub)]
2	#![deny(elided_lifetimes_in_paths)]
3
4	use std::borrow::Cow;
5	use std::cell::Cell;
6	use std::{fmt, str};
7
8	use nom::branch::alt;
9	use nom::bytes::complete::{escaped, is_not, tag, take_till};
10	use nom::character::complete::{anychar, char, one_of, satisfy};
11	use nom::combinator::{cut, eof, map, opt, recognize};
12	use nom::error::{Error, ErrorKind, FromExternalError};
13	use nom::multi::{many0_count, many1};
14	use nom::sequence::{delimited, pair, preceded, terminated, tuple};
15	use nom::{error_position, AsChar, InputTakeAtPosition};
16
17	pub mod expr;
18	pub use expr::Expr;
19	pub mod node;
20	pub use node::Node;
21	#[cfg(test)]
22	mod tests;
23
24	mod _parsed {
25	use std::cmp::PartialEq;
26	use std::{fmt, mem};
27
28	use super::node::Node;
29	use super::{Ast, ParseError, Syntax};
30
31	#[derive(Default)]
32	pub struct Parsed {
33	// `source` must outlive `ast`, so `ast` must be declared before `source`
34	ast: Ast<'static>,
35	#[allow(dead_code)]
36	source: String,
37	}
38
39	impl Parsed {
40	pub fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
41	// Self-referential borrowing: `self` will keep the source alive as `String`,
42	// internally we will transmute it to `&'static str` to satisfy the compiler.
43	// However, we only expose the nodes with a lifetime limited to `self`.
44	let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) };
45	let ast = Ast::from_str(src, syntax)?;
46	Ok(Self { ast, source })
47	}
48
49	// The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
50	pub fn nodes(&self) -> &[Node<'_>] {
51	&self.ast.nodes
52	}
53	}
54
55	impl fmt::Debug for Parsed {
56	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57	f.debug_struct("Parsed")
58	.field("nodes", &self.ast.nodes)
59	.finish_non_exhaustive()
60	}
61	}
62
63	impl PartialEq for Parsed {
64	fn eq(&self, other: &Self) -> bool {
65	self.ast.nodes == other.ast.nodes
66	}
67	}
68	}
69
70	pub use _parsed::Parsed;
71
72	#[derive(Debug, Default)]
73	pub struct Ast<'a> {
74	nodes: Vec<Node<'a>>,
75	}
76
77	impl<'a> Ast<'a> {
78	pub fn from_str(src: &'a str, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
79	let parse = \|i: &'a str\| Node::many(i, &State::new(syntax));
80	let (input, message) = match terminated(parse, cut(eof))(src) {
81	Ok(("", nodes)) => return Ok(Self { nodes }),
82	Ok(_) => unreachable!("eof() is not eof?"),
83	Err(
84	nom::Err::Error(ErrorContext { input, message, .. })
85	\| nom::Err::Failure(ErrorContext { input, message, .. }),
86	) => (input, message),
87	Err(nom::Err::Incomplete(_)) => return Err(ParseError("parsing incomplete".into())),
88	};
89
90	let offset = src.len() - input.len();
91	let (source_before, source_after) = src.split_at(offset);
92
93	let source_after = match source_after.char_indices().enumerate().take(`41`).last() {
94	Some((`40`, (i, _))) => format!("{:?}...", &source_after[..i]),
95	_ => format!("{source_after:?}"),
96	};
97
98	let (row, last_line) = source_before.lines().enumerate().last().unwrap_or_default();
99	let column = last_line.chars().count();
100
101	let msg = format!(
102	"{}problems parsing template source at row {}, column {} near:`\n`{}",
103	if let Some(message) = message {
104	format!("{message}`\n`")
105	} else {
106	String::new()
107	},
108	row + `1`,
109	column,
110	source_after,
111	);
112
113	Err(ParseError(msg))
114	}
115
116	pub fn nodes(&self) -> &[Node<'a>] {
117	&self.nodes
118	}
119	}
120
121	#[derive(Debug, Clone, PartialEq, Eq)]
122	pub struct ParseError(String);
123
124	impl std::error::Error for ParseError {}
125
126	impl fmt::Display for ParseError {
127	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128	self.0.fmt(f)
129	}
130	}
131
132	pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), nom::Err<ErrorContext<'a>>>;
133
134	/// This type is used to handle `nom` errors and in particular to add custom error messages.
135	/// It used to generate `ParserError`.
136	///
137	/// It cannot be used to replace `ParseError` because it expects a generic, which would make
138	/// `askama`'s users experience less good (since this generic is only needed for `nom`).
139	#[derive(Debug)]
140	pub(crate) struct ErrorContext<'a> {
141	pub(crate) input: &'a str,
142	pub(crate) message: Option<Cow<'static, str>>,
143	}
144
145	impl<'a> nom::error::ParseError<&'a str> for ErrorContext<'a> {
146	fn from_error_kind(input: &'a str, _code: ErrorKind) -> Self {
147	Self {
148	input,
149	message: None,
150	}
151	}
152
153	fn append(_: &'a str, _: ErrorKind, other: Self) -> Self {
154	other
155	}
156	}
157
158	impl<'a, E: std::fmt::Display> FromExternalError<&'a str, E> for ErrorContext<'a> {
159	fn from_external_error(input: &'a str, _kind: ErrorKind, e: E) -> Self {
160	Self {
161	input,
162	message: Some(Cow::Owned(e.to_string())),
163	}
164	}
165	}
166
167	impl<'a> ErrorContext<'a> {
168	pub(crate) fn from_err(error: nom::Err<Error<&'a str>>) -> nom::Err<Self> {
169	match error {
170	nom::Err::Incomplete(i: Needed) => nom::Err::Incomplete(i),
171	nom::Err::Failure(Error { input: &str, .. }) => nom::Err::Failure(Self {
172	input,
173	message: None,
174	}),
175	nom::Err::Error(Error { input: &str, .. }) => nom::Err::Error(Self {
176	input,
177	message: None,
178	}),
179	}
180	}
181	}
182
183	fn is_ws(c: char) -> bool {
184	matches!(c, ' ' \| '`\t`' \| '`\r`' \| '`\n`')
185	}
186
187	fn not_ws(c: char) -> bool {
188	!is_ws(c)
189	}
190
191	fn ws<'a, O>(
192	inner: impl FnMut(&'a str) -> ParseResult<'a, O>,
193	) -> impl FnMut(&'a str) -> ParseResult<'a, O> {
194	delimited(first:take_till(not_ws), second:inner, third:take_till(cond:not_ws))
195	}
196
197	/// Skips input until `end` was found, but does not consume it.
198	/// Returns tuple that would be returned when parsing `end`.
199	fn skip_till<'a, O>(
200	end: impl FnMut(&'a str) -> ParseResult<'a, O>,
201	) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
202	enum Next<O> {
203	IsEnd(O),
204	NotEnd(char),
205	}
206	let mut next: impl FnMut(&str) -> Result<…, …> = alt((map(parser:end, f:Next::IsEnd), map(parser:anychar, f:Next::NotEnd)));
207	move \|start: &'a str\| {
208	let mut i: &str = start;
209	loop {
210	let (j: &str, is_end: Next) = next(i)?;
211	match is_end {
212	Next::IsEnd(lookahead: O) => return Ok((i, (j, lookahead))),
213	Next::NotEnd(_) => i = j,
214	}
215	}
216	}
217	}
218
219	fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> ParseResult<'_> {
220	move \|i: &'a str\| -> ParseResult<'a> {
221	let (j: &str, v: &str) = identifier(input:i)?;
222	if k == v {
223	Ok((j, v))
224	} else {
225	Err(nom::Err::Error(error_position!(i, ErrorKind::Tag)))
226	}
227	}
228	}
229
230	fn identifier(input: &str) -> ParseResult<'_> {
231	fn start(s: &str) -> ParseResult<'_> {
232	s.split_at_position1_complete(
233	\|c\| !(c.is_alpha() \|\| c == '_' \|\| c >= '`\u{0080}`'),
234	e:nom::error::ErrorKind::Alpha,
235	)
236	}
237
238	fn tail(s: &str) -> ParseResult<'_> {
239	s.split_at_position1_complete(
240	\|c\| !(c.is_alphanum() \|\| c == '_' \|\| c >= '`\u{0080}`'),
241	e:nom::error::ErrorKind::Alpha,
242	)
243	}
244
245	recognize(parser:pair(first:start, second:opt(tail)))(input)
246	}
247
248	fn bool_lit(i: &str) -> ParseResult<'_> {
249	alt((keyword("false"), keyword("true")))(i)
250	}
251
252	fn num_lit(i: &str) -> ParseResult<'_> {
253	let integer_suffix = \|i\| {
254	alt((
255	tag("i8"),
256	tag("i16"),
257	tag("i32"),
258	tag("i64"),
259	tag("i128"),
260	tag("isize"),
261	tag("u8"),
262	tag("u16"),
263	tag("u32"),
264	tag("u64"),
265	tag("u128"),
266	tag("usize"),
267	))(i)
268	};
269	let float_suffix = \|i\| alt((tag("f32"), tag("f64")))(i);
270
271	recognize(tuple((
272	opt(char('-')),
273	alt((
274	recognize(tuple((
275	char('0'),
276	alt((
277	recognize(tuple((char('b'), separated_digits(`2`, `false`)))),
278	recognize(tuple((char('o'), separated_digits(`8`, `false`)))),
279	recognize(tuple((char('x'), separated_digits(`16`, `false`)))),
280	)),
281	opt(integer_suffix),
282	))),
283	recognize(tuple((
284	separated_digits(`10`, `true`),
285	opt(alt((
286	integer_suffix,
287	float_suffix,
288	recognize(tuple((
289	opt(tuple((char('.'), separated_digits(`10`, `true`)))),
290	one_of("eE"),
291	opt(one_of("+-")),
292	separated_digits(`10`, `false`),
293	opt(float_suffix),
294	))),
295	recognize(tuple((
296	char('.'),
297	separated_digits(`10`, `true`),
298	opt(float_suffix),
299	))),
300	))),
301	))),
302	)),
303	)))(i)
304	}
305
306	/// Underscore separated digits of the given base, unless `start` is true this may start
307	/// with an underscore.
308	fn separated_digits(radix: u32, start: bool) -> impl Fn(&str) -> ParseResult<'_> {
309	move \|i: &str\| {
310	recognize(parser:tuple((
311	\|i: &str\| match start {
312	`true` => Ok((i, `0`)),
313	`false` => many0_count(char('_'))(i),
314	},
315	satisfy(\|ch: char\| ch.is_digit(radix)),
316	many0_count(satisfy(\|ch: char\| ch == '_' \|\| ch.is_digit(radix))),
317	)))(i)
318	}
319	}
320
321	fn str_lit(i: &str) -> ParseResult<'_> {
322	let (i: &str, s: Option<&str>) = delimited(
323	first:char('"'),
324	second:opt(escaped(is_not("`\\\"`"), '`\\`', anychar)),
325	third:char('"'),
326	)(i)?;
327	Ok((i, s.unwrap_or_default()))
328	}
329
330	fn char_lit(i: &str) -> ParseResult<'_> {
331	let (i: &str, s: Option<&str>) = delimited(
332	first:char('`\'`'),
333	second:opt(escaped(is_not("`\\\'`"), '`\\`', anychar)),
334	third:char('`\'`'),
335	)(i)?;
336	Ok((i, s.unwrap_or_default()))
337	}
338
339	enum PathOrIdentifier<'a> {
340	Path(Vec<&'a str>),
341	Identifier(&'a str),
342	}
343
344	fn path_or_identifier(i: &str) -> ParseResult<'_, PathOrIdentifier<'_>> {
345	let root = ws(opt(tag("::")));
346	let tail = opt(many1(preceded(ws(tag("::")), identifier)));
347
348	let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
349	let rest = rest.as_deref().unwrap_or_default();
350
351	// The returned identifier can be assumed to be path if:
352	// - it is an absolute path (starts with `::`), or
353	// - it has multiple components (at least one `::`), or
354	// - the first letter is uppercase
355	match (root, start, rest) {
356	(Some(_), start, tail) => {
357	let mut path = Vec::with_capacity(`2` + tail.len());
358	path.push("");
359	path.push(start);
360	path.extend(rest);
361	Ok((i, PathOrIdentifier::Path(path)))
362	}
363	(None, name, []) if name.chars().next().map_or(`true`, \|c\| c.is_lowercase()) => {
364	Ok((i, PathOrIdentifier::Identifier(name)))
365	}
366	(None, start, tail) => {
367	let mut path = Vec::with_capacity(`1` + tail.len());
368	path.push(start);
369	path.extend(rest);
370	Ok((i, PathOrIdentifier::Path(path)))
371	}
372	}
373	}
374
375	struct State<'a> {
376	syntax: &'a Syntax<'a>,
377	loop_depth: Cell<usize>,
378	level: Cell<Level>,
379	}
380
381	impl<'a> State<'a> {
382	fn new(syntax: &'a Syntax<'a>) -> State<'a> {
383	State {
384	syntax,
385	loop_depth: Cell::new(`0`),
386	level: Cell::new(Level::default()),
387	}
388	}
389
390	fn nest<'b>(&self, i: &'b str) -> ParseResult<'b, ()> {
391	let (_, level) = self.level.get().nest(i)?;
392	self.level.set(level);
393	Ok((i, ()))
394	}
395
396	fn leave(&self) {
397	self.level.set(self.level.get().leave());
398	}
399
400	fn tag_block_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
401	tag(self.syntax.block_start)(i)
402	}
403
404	fn tag_block_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
405	tag(self.syntax.block_end)(i)
406	}
407
408	fn tag_comment_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
409	tag(self.syntax.comment_start)(i)
410	}
411
412	fn tag_comment_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
413	tag(self.syntax.comment_end)(i)
414	}
415
416	fn tag_expr_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
417	tag(self.syntax.expr_start)(i)
418	}
419
420	fn tag_expr_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
421	tag(self.syntax.expr_end)(i)
422	}
423
424	fn enter_loop(&self) {
425	self.loop_depth.set(self.loop_depth.get() + `1`);
426	}
427
428	fn leave_loop(&self) {
429	self.loop_depth.set(self.loop_depth.get() - `1`);
430	}
431
432	fn is_in_loop(&self) -> bool {
433	self.loop_depth.get() > `0`
434	}
435	}
436
437	#[derive(Debug)]
438	pub struct Syntax<'a> {
439	pub block_start: &'a str,
440	pub block_end: &'a str,
441	pub expr_start: &'a str,
442	pub expr_end: &'a str,
443	pub comment_start: &'a str,
444	pub comment_end: &'a str,
445	}
446
447	impl Default for Syntax<'static> {
448	fn default() -> Self {
449	Self {
450	block_start: "{%",
451	block_end: "%}",
452	expr_start: "{{",
453	expr_end: "}}",
454	comment_start: "{#",
455	comment_end: "#}",
456	}
457	}
458	}
459
460	#[derive(Clone, Copy, Default)]
461	pub(crate) struct Level(u8);
462
463	impl Level {
464	fn nest(self, i: &str) -> ParseResult<'_, Level> {
465	if self.0 >= Self::MAX_DEPTH {
466	return Err(ErrorContext::from_err(error:nom::Err::Failure(error_position!(
467	i,
468	ErrorKind::TooLarge
469	))));
470	}
471
472	Ok((i, Level(self.0 + `1`)))
473	}
474
475	fn leave(&self) -> Self {
476	Level(self.0 - `1`)
477	}
478
479	const MAX_DEPTH: u8 = `128`;
480	}
481