lib.rs source code [crates/rinja_parser/src/lib.rs]

1	#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
2	#![deny(elided_lifetimes_in_paths)]
3	#![deny(unreachable_pub)]
4
5	use std::borrow::Cow;
6	use std::cell::Cell;
7	use std::env::current_dir;
8	use std::ops::{Deref, DerefMut};
9	use std::path::Path;
10	use std::sync::Arc;
11	use std::{fmt, str};
12
13	use nom::branch::alt;
14	use nom::bytes::complete::{escaped, is_not, tag, take_till, take_while_m_n};
15	use nom::character::complete::{anychar, char, one_of, satisfy};
16	use nom::combinator::{consumed, cut, fail, map, not, opt, recognize, value};
17	use nom::error::{ErrorKind, FromExternalError};
18	use nom::multi::{many0_count, many1};
19	use nom::sequence::{delimited, pair, preceded, tuple};
20	use nom::{AsChar, InputTakeAtPosition};
21
22	pub mod expr;
23	pub use expr::{Expr, Filter};
24	mod memchr_splitter;
25	pub mod node;
26	pub use node::Node;
27
28	mod target;
29	pub use target::Target;
30	#[cfg(test)]
31	mod tests;
32
33	mod _parsed {
34	use std::path::Path;
35	use std::sync::Arc;
36	use std::{fmt, mem};
37
38	use super::node::Node;
39	use super::{Ast, ParseError, Syntax};
40
41	pub struct Parsed {
42	// `source` must outlive `ast`, so `ast` must be declared before `source`
43	ast: Ast<'static>,
44	#[allow(dead_code)]
45	source: Arc<str>,
46	}
47
48	impl Parsed {
49	/// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
50	/// a parsing error occurs, we won't display the path as it wouldn't be useful.
51	pub fn new(
52	source: Arc<str>,
53	file_path: Option<Arc<Path>>,
54	syntax: &Syntax<'_>,
55	) -> Result<Self, ParseError> {
56	// Self-referential borrowing: `self` will keep the source alive as `String`,
57	// internally we will transmute it to `&'static str` to satisfy the compiler.
58	// However, we only expose the nodes with a lifetime limited to `self`.
59	let src = unsafe { mem::transmute::<&str, &'static str>(source.as_ref()) };
60	let ast = Ast::from_str(src, file_path, syntax)?;
61	Ok(Self { ast, source })
62	}
63
64	// The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
65	#[must_use]
66	pub fn nodes(&self) -> &[Node<'_>] {
67	&self.ast.nodes
68	}
69
70	#[must_use]
71	pub fn source(&self) -> &str {
72	&self.source
73	}
74	}
75
76	impl fmt::Debug for Parsed {
77	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78	f.debug_struct("Parsed")
79	.field("nodes", &self.ast.nodes)
80	.finish_non_exhaustive()
81	}
82	}
83
84	impl PartialEq for Parsed {
85	fn eq(&self, other: &Self) -> bool {
86	self.ast.nodes == other.ast.nodes
87	}
88	}
89
90	impl Default for Parsed {
91	fn default() -> Self {
92	Self {
93	ast: Ast::default(),
94	source: "".into(),
95	}
96	}
97	}
98	}
99
100	pub use _parsed::Parsed;
101
102	#[derive(Debug, Default)]
103	pub struct Ast<'a> {
104	nodes: Vec<Node<'a>>,
105	}
106
107	impl<'a> Ast<'a> {
108	/// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
109	/// a parsing error occurs, we won't display the path as it wouldn't be useful.
110	pub fn from_str(
111	src: &'a str,
112	file_path: Option<Arc<Path>>,
113	syntax: &Syntax<'_>,
114	) -> Result<Self, ParseError> {
115	match Node::parse_template(src, &State::new(syntax)) {
116	Ok(("", nodes)) => Ok(Self { nodes }),
117	Ok(_) \| Err(nom::Err::Incomplete(_)) => unreachable!(),
118	Err(
119	nom::Err::Error(ErrorContext { input, message, .. })
120	\| nom::Err::Failure(ErrorContext { input, message, .. }),
121	) => Err(ParseError {
122	message,
123	offset: src.len() - input.len(),
124	file_path,
125	}),
126	}
127	}
128
129	#[must_use]
130	pub fn nodes(&self) -> &[Node<'a>] {
131	&self.nodes
132	}
133	}
134
135	/// Struct used to wrap types with their associated "span" which is used when generating errors
136	/// in the code generation.
137	pub struct WithSpan<'a, T> {
138	inner: T,
139	span: &'a str,
140	}
141
142	impl<'a, T> WithSpan<'a, T> {
143	pub const fn new(inner: T, span: &'a str) -> Self {
144	Self { inner, span }
145	}
146
147	pub fn span(&self) -> &'a str {
148	self.span
149	}
150
151	pub fn deconstruct(self) -> (T, &'a str) {
152	let Self { inner: T, span: &'a str } = self;
153	(inner, span)
154	}
155	}
156
157	impl<'a, T> Deref for WithSpan<'a, T> {
158	type Target = T;
159
160	fn deref(&self) -> &Self::Target {
161	&self.inner
162	}
163	}
164
165	impl<'a, T> DerefMut for WithSpan<'a, T> {
166	fn deref_mut(&mut self) -> &mut Self::Target {
167	&mut self.inner
168	}
169	}
170
171	impl<'a, T: fmt::Debug> fmt::Debug for WithSpan<'a, T> {
172	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
173	write!(f, "{:?}", self.inner)
174	}
175	}
176
177	impl<'a, T: Clone> Clone for WithSpan<'a, T> {
178	fn clone(&self) -> Self {
179	Self {
180	inner: self.inner.clone(),
181	span: self.span,
182	}
183	}
184	}
185
186	impl<'a, T: PartialEq> PartialEq for WithSpan<'a, T> {
187	fn eq(&self, other: &Self) -> bool {
188	// We never want to compare the span information.
189	self.inner == other.inner
190	}
191	}
192
193	#[derive(Debug, Clone, PartialEq, Eq)]
194	pub struct ParseError {
195	pub message: Option<Cow<'static, str>>,
196	pub offset: usize,
197	pub file_path: Option<Arc<Path>>,
198	}
199
200	impl std::error::Error for ParseError {}
201
202	impl fmt::Display for ParseError {
203	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204	let ParseError {
205	message: &Option>,
206	file_path: &Option>,
207	offset: &usize,
208	} = self;
209
210	if let Some(message: &Cow<'static, str>) = message {
211	writeln!(f, "{message}")?;
212	}
213
214	let path: Option = file_pathOption<&Arc>
215	.as_ref()
216	.and_then(\|path: &Arc\| Some(strip_common(&current_dir().ok()?, path)));
217	match path {
218	Some(path: String) => write!(f, "failed to parse template source`\n` --> {path}@{offset}"),
219	None => write!(f, "failed to parse template source near offset {offset}"),
220	}
221	}
222	}
223
224	pub(crate) type ParseErr<'a> = nom::Err<ErrorContext<'a>>;
225	pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), ParseErr<'a>>;
226
227	/// This type is used to handle `nom` errors and in particular to add custom error messages.
228	/// It used to generate `ParserError`.
229	///
230	/// It cannot be used to replace `ParseError` because it expects a generic, which would make
231	/// `rinja`'s users experience less good (since this generic is only needed for `nom`).
232	#[derive(Debug)]
233	pub(crate) struct ErrorContext<'a> {
234	pub(crate) input: &'a str,
235	pub(crate) message: Option<Cow<'static, str>>,
236	}
237
238	impl<'a> ErrorContext<'a> {
239	fn unclosed(kind: &str, tag: &str, i: &'a str) -> Self {
240	Self::new(message:format!("unclosed {kind}, missing {tag:?}"), input:i)
241	}
242
243	fn new(message: impl Into<Cow<'static, str>>, input: &'a str) -> Self {
244	Self {
245	input,
246	message: Some(message.into()),
247	}
248	}
249	}
250
251	impl<'a> nom::error::ParseError<&'a str> for ErrorContext<'a> {
252	fn from_error_kind(input: &'a str, _code: ErrorKind) -> Self {
253	Self {
254	input,
255	message: None,
256	}
257	}
258
259	fn append(_: &'a str, _: ErrorKind, other: Self) -> Self {
260	other
261	}
262	}
263
264	impl<'a, E: std::fmt::Display> FromExternalError<&'a str, E> for ErrorContext<'a> {
265	fn from_external_error(input: &'a str, _kind: ErrorKind, e: E) -> Self {
266	Self {
267	input,
268	message: Some(Cow::Owned(e.to_string())),
269	}
270	}
271	}
272
273	impl<'a> From<ErrorContext<'a>> for nom::Err<ErrorContext<'a>> {
274	fn from(cx: ErrorContext<'a>) -> Self {
275	Self::Failure(cx)
276	}
277	}
278
279	fn is_ws(c: char) -> bool {
280	matches!(c, ' ' \| '`\t`' \| '`\r`' \| '`\n`')
281	}
282
283	fn not_ws(c: char) -> bool {
284	!is_ws(c)
285	}
286
287	fn ws<'a, O>(
288	inner: impl FnMut(&'a str) -> ParseResult<'a, O>,
289	) -> impl FnMut(&'a str) -> ParseResult<'a, O> {
290	delimited(first:take_till(not_ws), second:inner, third:take_till(cond:not_ws))
291	}
292
293	/// Skips input until `end` was found, but does not consume it.
294	/// Returns tuple that would be returned when parsing `end`.
295	fn skip_till<'a, 'b, O>(
296	candidate_finder: impl crate::memchr_splitter::Splitter,
297	end: impl FnMut(&'a str) -> ParseResult<'a, O>,
298	) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
299	let mut next: impl FnMut(&'a str) -> Result<…, …> = alt((map(parser:end, f:Some), map(parser:anychar, \|_\| None)));
300	move \|start: &'a str\| {
301	let mut i: &'a str = start;
302	loop {
303	i = match candidate_finder.split(haystack:i) {
304	Some((_, j: &str)) => j,
305	None => return Err(nom::Err::Error(ErrorContext::new(message:"`end` not found`", input:i))),
306	};
307	i = match next(i)? {
308	(j: &'a str, Some(lookahead: O)) => return Ok((i, (j, lookahead))),
309	(j: &'a str, None) => j,
310	};
311	}
312	}
313	}
314
315	fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> ParseResult<'a> {
316	move \|i: &'a str\| -> ParseResult<'a> {
317	let (j: &str, v: &str) = identifier(input:i)?;
318	if k == v { Ok((j, v)) } else { fail(i) }
319	}
320	}
321
322	fn identifier(input: &str) -> ParseResult<'_> {
323	fn start(s: &str) -> ParseResult<'_> {
324	s.split_at_position1_complete(
325	\|c\| !(c.is_alpha() \|\| c == '_' \|\| c >= '`\u{0080}`'),
326	e:nom::error::ErrorKind::Alpha,
327	)
328	}
329
330	fn tail(s: &str) -> ParseResult<'_> {
331	s.split_at_position1_complete(
332	\|c\| !(c.is_alphanum() \|\| c == '_' \|\| c >= '`\u{0080}`'),
333	e:nom::error::ErrorKind::Alpha,
334	)
335	}
336
337	recognize(parser:pair(first:start, second:opt(tail)))(input)
338	}
339
340	fn bool_lit(i: &str) -> ParseResult<'_> {
341	alt((keyword("false"), keyword("true")))(i)
342	}
343
344	#[derive(Debug, Clone, Copy, PartialEq)]
345	pub enum Num<'a> {
346	Int(&'a str, Option<IntKind>),
347	Float(&'a str, Option<FloatKind>),
348	}
349
350	fn num_lit<'a>(start: &'a str) -> ParseResult<'a, Num<'a>> {
351	fn num_lit_suffix<'a, T: Copy>(
352	kind: &'a str,
353	list: &[(&str, T)],
354	start: &'a str,
355	i: &'a str,
356	) -> ParseResult<'a, T> {
357	let (i, suffix) = identifier(i)?;
358	if let Some(value) = list
359	.iter()
360	.copied()
361	.find_map(\|(name, value)\| (name == suffix).then_some(value))
362	{
363	Ok((i, value))
364	} else {
365	Err(nom::Err::Failure(ErrorContext::new(
366	format!("unknown {kind} suffix `{suffix}`"),
367	start,
368	)))
369	}
370	}
371
372	// Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L587-L620>.
373	let int_with_base = pair(opt(char('-')), \|i\| {
374	let (i, (kind, base)) = consumed(preceded(
375	char('0'),
376	alt((
377	value(`2`, char('b')),
378	value(`8`, char('o')),
379	value(`16`, char('x')),
380	)),
381	))(i)?;
382	match opt(separated_digits(base, `false`))(i)? {
383	(i, Some(_)) => Ok((i, ())),
384	(_, None) => Err(nom::Err::Failure(ErrorContext::new(
385	format!("expected digits after `{kind}`"),
386	start,
387	))),
388	}
389	});
390
391	// Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L626-L653>:
392	// no `_` directly after the decimal point `.`, or between `e` and `+/-`.
393	let float = \|i: &'a str\| -> ParseResult<'a, ()> {
394	let (i, has_dot) = opt(pair(char('.'), separated_digits(`10`, `true`)))(i)?;
395	let (i, has_exp) = opt(\|i\| {
396	let (i, (kind, op)) = pair(one_of("eE"), opt(one_of("+-")))(i)?;
397	match opt(separated_digits(`10`, op.is_none()))(i)? {
398	(i, Some(_)) => Ok((i, ())),
399	(_, None) => Err(nom::Err::Failure(ErrorContext::new(
400	format!("expected decimal digits, `+` or `-` after exponent `{kind}`"),
401	start,
402	))),
403	}
404	})(i)?;
405	match (has_dot, has_exp) {
406	(Some(_), _) \| (_, Some(())) => Ok((i, ())),
407	_ => fail(start),
408	}
409	};
410
411	let (i, num) = if let Ok((i, Some(num))) = opt(recognize(int_with_base))(start) {
412	let (i, suffix) = opt(\|i\| num_lit_suffix("integer", INTEGER_TYPES, start, i))(i)?;
413	(i, Num::Int(num, suffix))
414	} else {
415	let (i, (num, float)) = consumed(preceded(
416	pair(opt(char('-')), separated_digits(`10`, `true`)),
417	opt(float),
418	))(start)?;
419	if float.is_some() {
420	let (i, suffix) = opt(\|i\| num_lit_suffix("float", FLOAT_TYPES, start, i))(i)?;
421	(i, Num::Float(num, suffix))
422	} else {
423	let (i, suffix) = opt(\|i\| num_lit_suffix("number", NUM_TYPES, start, i))(i)?;
424	match suffix {
425	Some(NumKind::Int(kind)) => (i, Num::Int(num, Some(kind))),
426	Some(NumKind::Float(kind)) => (i, Num::Float(num, Some(kind))),
427	None => (i, Num::Int(num, None)),
428	}
429	}
430	};
431	Ok((i, num))
432	}
433
434	/// Underscore separated digits of the given base, unless `start` is true this may start
435	/// with an underscore.
436	fn separated_digits(radix: u32, start: bool) -> impl Fn(&str) -> ParseResult<'_> {
437	move \|i: &str\| {
438	recognize(parser:tuple((
439	\|i: &str\| match start {
440	`true` => Ok((i, `0`)),
441	`false` => many0_count(char('_'))(i),
442	},
443	satisfy(\|ch: char\| ch.is_digit(radix)),
444	many0_count(satisfy(\|ch: char\| ch == '_' \|\| ch.is_digit(radix))),
445	)))(i)
446	}
447	}
448
449	#[derive(Clone, Copy, Debug, PartialEq)]
450	pub enum StrPrefix {
451	Binary,
452	CLike,
453	}
454
455	impl StrPrefix {
456	#[must_use]
457	pub fn to_char(self) -> char {
458	match self {
459	Self::Binary => 'b',
460	Self::CLike => 'c',
461	}
462	}
463	}
464
465	impl fmt::Display for StrPrefix {
466	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
467	use std::fmt::Write;
468
469	f.write_char(self.to_char())
470	}
471	}
472
473	#[derive(Clone, Debug, PartialEq)]
474	pub struct StrLit<'a> {
475	pub prefix: Option<StrPrefix>,
476	pub content: &'a str,
477	}
478
479	fn str_lit_without_prefix(i: &str) -> ParseResult<'_> {
480	let (i: &str, s: Option<&str>) = delimited(
481	first:char('"'),
482	second:opt(escaped(is_not("`\\\"`"), '`\\`', anychar)),
483	third:char('"'),
484	)(i)?;
485	Ok((i, s.unwrap_or_default()))
486	}
487
488	fn str_lit(i: &str) -> Result<(&str, StrLit<'_>), ParseErr<'_>> {
489	let (i: &str, (prefix: Option, content: &str)) =
490	tuple((opt(alt((char('b'), char('c')))), str_lit_without_prefix))(i)?;
491	let prefix: Option = match prefix {
492	Some('b') => Some(StrPrefix::Binary),
493	Some('c') => Some(StrPrefix::CLike),
494	_ => None,
495	};
496	Ok((i, StrLit { prefix, content }))
497	}
498
499	#[derive(Clone, Copy, Debug, PartialEq)]
500	pub enum CharPrefix {
501	Binary,
502	}
503
504	#[derive(Clone, Debug, PartialEq)]
505	pub struct CharLit<'a> {
506	pub prefix: Option<CharPrefix>,
507	pub content: &'a str,
508	}
509
510	// Information about allowed character escapes is available at:
511	// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
512	fn char_lit(i: &str) -> Result<(&str, CharLit<'_>), ParseErr<'_>> {
513	let start = i;
514	let (i, (b_prefix, s)) = tuple((
515	opt(char('b')),
516	delimited(
517	char('`\'`'),
518	opt(escaped(is_not("`\\\'`"), '`\\`', anychar)),
519	char('`\'`'),
520	),
521	))(i)?;
522
523	let Some(s) = s else {
524	return Err(nom::Err::Failure(ErrorContext::new(
525	"empty character literal",
526	start,
527	)));
528	};
529	let Ok(("", c)) = Char::parse(s) else {
530	return Err(nom::Err::Failure(ErrorContext::new(
531	"invalid character",
532	start,
533	)));
534	};
535
536	let (nb, max_value, err1, err2) = match c {
537	Char::Literal \| Char::Escaped => {
538	return Ok((i, CharLit {
539	prefix: b_prefix.map(\|_\| CharPrefix::Binary),
540	content: s,
541	}));
542	}
543	Char::AsciiEscape(nb) => (
544	nb,
545	// `0x7F` is the maximum value for a `\x` escaped character.
546	`0x7F`,
547	"invalid character in ascii escape",
548	"must be a character in the range [`\\`x00-`\\`x7f]",
549	),
550	Char::UnicodeEscape(nb) => (
551	nb,
552	// `0x10FFFF` is the maximum value for a `\u` escaped character.
553	`0x0010_FFFF`,
554	"invalid character in unicode escape",
555	"unicode escape must be at most 10FFFF",
556	),
557	};
558
559	let Ok(nb) = u32::from_str_radix(nb, `16`) else {
560	return Err(nom::Err::Failure(ErrorContext::new(err1, start)));
561	};
562	if nb > max_value {
563	return Err(nom::Err::Failure(ErrorContext::new(err2, start)));
564	}
565
566	Ok((i, CharLit {
567	prefix: b_prefix.map(\|_\| CharPrefix::Binary),
568	content: s,
569	}))
570	}
571
572	/// Represents the different kinds of char declarations:
573	enum Char<'a> {
574	/// Any character that is not escaped.
575	Literal,
576	/// An escaped character (like `\n`) which doesn't require any extra check.
577	Escaped,
578	/// Ascii escape (like `\x12`).
579	AsciiEscape(&'a str),
580	/// Unicode escape (like `\u{12}`).
581	UnicodeEscape(&'a str),
582	}
583
584	impl<'a> Char<'a> {
585	fn parse(i: &'a str) -> ParseResult<'a, Self> {
586	if i.chars().count() == `1` {
587	return Ok(("", Self::Literal));
588	}
589	map(
590	tuple((
591	char('`\\`'),
592	alt((
593	map(char('n'), \|_\| Self::Escaped),
594	map(char('r'), \|_\| Self::Escaped),
595	map(char('t'), \|_\| Self::Escaped),
596	map(char('`\\`'), \|_\| Self::Escaped),
597	map(char('0'), \|_\| Self::Escaped),
598	map(char('`\'`'), \|_\| Self::Escaped),
599	// Not useful but supported by rust.
600	map(char('"'), \|_\| Self::Escaped),
601	map(
602	tuple((
603	char('x'),
604	take_while_m_n(`2`, `2`, \|c: char\| c.is_ascii_hexdigit()),
605	)),
606	\|(_, s)\| Self::AsciiEscape(s),
607	),
608	map(
609	tuple((
610	tag("u{"),
611	take_while_m_n(`1`, `6`, \|c: char\| c.is_ascii_hexdigit()),
612	char('}'),
613	)),
614	\|(_, s, _)\| Self::UnicodeEscape(s),
615	),
616	)),
617	)),
618	\|(_, ch)\| ch,
619	)(i)
620	}
621	}
622
623	enum PathOrIdentifier<'a> {
624	Path(Vec<&'a str>),
625	Identifier(&'a str),
626	}
627
628	fn path_or_identifier(i: &str) -> ParseResult<'_, PathOrIdentifier<'_>> {
629	let root = ws(opt(tag("::")));
630	let tail = opt(many1(preceded(ws(tag("::")), identifier)));
631
632	let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
633	let rest = rest.as_deref().unwrap_or_default();
634
635	// The returned identifier can be assumed to be path if:
636	// - it is an absolute path (starts with `::`), or
637	// - it has multiple components (at least one `::`), or
638	// - the first letter is uppercase
639	match (root, start, rest) {
640	(Some(_), start, tail) => {
641	let mut path = Vec::with_capacity(`2` + tail.len());
642	path.push("");
643	path.push(start);
644	path.extend(rest);
645	Ok((i, PathOrIdentifier::Path(path)))
646	}
647	(None, name, []) if name.chars().next().map_or(`true`, char::is_lowercase) => {
648	Ok((i, PathOrIdentifier::Identifier(name)))
649	}
650	(None, start, tail) => {
651	let mut path = Vec::with_capacity(`1` + tail.len());
652	path.push(start);
653	path.extend(rest);
654	Ok((i, PathOrIdentifier::Path(path)))
655	}
656	}
657	}
658
659	struct State<'a> {
660	syntax: &'a Syntax<'a>,
661	loop_depth: Cell<usize>,
662	level: Cell<Level>,
663	}
664
665	impl<'a> State<'a> {
666	fn new(syntax: &'a Syntax<'a>) -> State<'a> {
667	State {
668	syntax,
669	loop_depth: Cell::new(`0`),
670	level: Cell::new(Level::default()),
671	}
672	}
673
674	fn nest<'b, T, F: FnOnce(&'b str) -> ParseResult<'b, T>>(
675	&self,
676	i: &'b str,
677	callback: F,
678	) -> ParseResult<'b, T> {
679	let prev_level = self.level.get();
680	let (_, level) = prev_level.nest(i)?;
681	self.level.set(level);
682	let ret = callback(i);
683	self.level.set(prev_level);
684	ret
685	}
686
687	fn tag_block_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
688	tag(self.syntax.block_start)(i)
689	}
690
691	fn tag_block_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
692	tag(self.syntax.block_end)(i)
693	}
694
695	fn tag_comment_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
696	tag(self.syntax.comment_start)(i)
697	}
698
699	fn tag_comment_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
700	tag(self.syntax.comment_end)(i)
701	}
702
703	fn tag_expr_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
704	tag(self.syntax.expr_start)(i)
705	}
706
707	fn tag_expr_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
708	tag(self.syntax.expr_end)(i)
709	}
710
711	fn enter_loop(&self) {
712	self.loop_depth.set(self.loop_depth.get() + `1`);
713	}
714
715	fn leave_loop(&self) {
716	self.loop_depth.set(self.loop_depth.get() - `1`);
717	}
718
719	fn is_in_loop(&self) -> bool {
720	self.loop_depth.get() > `0`
721	}
722	}
723
724	#[derive(Default, Hash, PartialEq, Clone, Copy)]
725	pub struct Syntax<'a>(InnerSyntax<'a>);
726
727	// This abstraction ensures that the fields are readable, but not writable.
728	#[derive(Hash, PartialEq, Clone, Copy)]
729	pub struct InnerSyntax<'a> {
730	pub block_start: &'a str,
731	pub block_end: &'a str,
732	pub expr_start: &'a str,
733	pub expr_end: &'a str,
734	pub comment_start: &'a str,
735	pub comment_end: &'a str,
736	}
737
738	impl<'a> Deref for Syntax<'a> {
739	type Target = InnerSyntax<'a>;
740
741	#[inline]
742	fn deref(&self) -> &Self::Target {
743	&self.0
744	}
745	}
746
747	impl Default for InnerSyntax<'static> {
748	fn default() -> Self {
749	Self {
750	block_start: "{%",
751	block_end: "%}",
752	expr_start: "{{",
753	expr_end: "}}",
754	comment_start: "{#",
755	comment_end: "#}",
756	}
757	}
758	}
759
760	impl<'a> fmt::Debug for Syntax<'a> {
761	#[inline]
762	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
763	fmt_syntax(name:"Syntax", self, f)
764	}
765	}
766
767	impl<'a> fmt::Debug for InnerSyntax<'a> {
768	#[inline]
769	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
770	fmt_syntax(name:"InnerSyntax", self, f)
771	}
772	}
773
774	fn fmt_syntax(name: &str, inner: &InnerSyntax<'_>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
775	f&mut DebugStruct<'_, '_>.debug_struct(name)
776	.field("block_start", &inner.block_start)
777	.field("block_end", &inner.block_end)
778	.field("expr_start", &inner.expr_start)
779	.field("expr_end", &inner.expr_end)
780	.field("comment_start", &inner.comment_start)
781	.field(name:"comment_end", &inner.comment_end)
782	.finish()
783	}
784
785	#[derive(Debug, Default, Clone, Copy, Hash, PartialEq)]
786	#[cfg_attr(feature = "config", derive(serde::Deserialize))]
787	pub struct SyntaxBuilder<'a> {
788	pub name: &'a str,
789	pub block_start: Option<&'a str>,
790	pub block_end: Option<&'a str>,
791	pub expr_start: Option<&'a str>,
792	pub expr_end: Option<&'a str>,
793	pub comment_start: Option<&'a str>,
794	pub comment_end: Option<&'a str>,
795	}
796
797	impl<'a> SyntaxBuilder<'a> {
798	pub fn to_syntax(&self) -> Result<Syntax<'a>, String> {
799	let default = InnerSyntax::default();
800	let syntax = Syntax(InnerSyntax {
801	block_start: self.block_start.unwrap_or(default.block_start),
802	block_end: self.block_end.unwrap_or(default.block_end),
803	expr_start: self.expr_start.unwrap_or(default.expr_start),
804	expr_end: self.expr_end.unwrap_or(default.expr_end),
805	comment_start: self.comment_start.unwrap_or(default.comment_start),
806	comment_end: self.comment_end.unwrap_or(default.comment_end),
807	});
808
809	for (s, k, is_closing) in [
810	(syntax.block_start, "opening block", `false`),
811	(syntax.block_end, "closing block", `true`),
812	(syntax.expr_start, "opening expression", `false`),
813	(syntax.expr_end, "closing expression", `true`),
814	(syntax.comment_start, "opening comment", `false`),
815	(syntax.comment_end, "closing comment", `true`),
816	] {
817	if s.len() < `2` {
818	return Err(format!(
819	"delimiters must be at least two characters long. \
820	The {k} delimiter ({s:?}) is too short",
821	));
822	} else if s.chars().any(char::is_whitespace) {
823	return Err(format!(
824	"delimiters may not contain white spaces. \
825	The {k} delimiter ({s:?}) contains white spaces",
826	));
827	} else if is_closing
828	&& ['(', '-', '+', '~', '.', '>', '<', '&', '\|', '!']
829	.contains(&s.chars().next().unwrap())
830	{
831	return Err(format!(
832	"closing delimiters may not start with operators. \
833	The {k} delimiter ({s:?}) starts with operator `{}`",
834	s.chars().next().unwrap(),
835	));
836	}
837	}
838
839	for ((s1, k1), (s2, k2)) in [
840	(
841	(syntax.block_start, "block"),
842	(syntax.expr_start, "expression"),
843	),
844	(
845	(syntax.block_start, "block"),
846	(syntax.comment_start, "comment"),
847	),
848	(
849	(syntax.expr_start, "expression"),
850	(syntax.comment_start, "comment"),
851	),
852	] {
853	if s1.starts_with(s2) \|\| s2.starts_with(s1) {
854	let (s1, k1, s2, k2) = match s1.len() < s2.len() {
855	`true` => (s1, k1, s2, k2),
856	`false` => (s2, k2, s1, k1),
857	};
858	return Err(format!(
859	"an opening delimiter may not be the prefix of another delimiter. \
860	The {k1} delimiter ({s1:?}) clashes with the {k2} delimiter ({s2:?})",
861	));
862	}
863	}
864
865	Ok(syntax)
866	}
867	}
868
869	#[derive(Clone, Copy, Default)]
870	pub(crate) struct Level(u8);
871
872	impl Level {
873	fn nest(self, i: &str) -> ParseResult<'_, Level> {
874	if self.0 >= Self::MAX_DEPTH {
875	return Err(nom::Err::Failure(ErrorContext::new(
876	message:"your template code is too deeply nested, or last expression is too complex",
877	input:i,
878	)));
879	}
880
881	Ok((i, Level(self.0 + `1`)))
882	}
883
884	const MAX_DEPTH: u8 = `128`;
885	}
886
887	fn filter<'a>(
888	i: &'a str,
889	level: &mut Level,
890	) -> ParseResult<'a, (&'a str, Option<Vec<WithSpan<'a, Expr<'a>>>>)> {
891	let (j: &'a str, _) = take_till(cond:not_ws)(i)?;
892	let had_spaces: bool = i.len() != j.len();
893	let (j: &'a str, _) = pair(first:char('\|'), second:not(parser:char('\|')))(j)?;
894
895	if !had_spaces {
896	*level = level.nest(i)?.1;
897	cut(parser:pair(
898	first:ws(identifier),
899	second:opt(\|i: &str\| Expr::arguments(i, *level, is_template_macro:`false`)),
900	))(j)
901	} else {
902	Err(nom::Err::Failure(ErrorContext::new(
903	message:"the filter operator `\|` must not be preceded by any whitespace characters`\n`\
904	the binary OR operator is called `bitor` in rinja",
905	input:i,
906	)))
907	}
908	}
909
910	/// Returns the common parts of two paths.
911	///
912	/// The goal of this function is to reduce the path length based on the `base` argument
913	/// (generally the path where the program is running into). For example:
914	///
915	/// ```text
916	/// current dir: /a/b/c
917	/// path: /a/b/c/d/e.txt
918	/// ```
919	///
920	/// `strip_common` will return `d/e.txt`.
921	#[must_use]
922	pub fn strip_common(base: &Path, path: &Path) -> String {
923	let path = match path.canonicalize() {
924	Ok(path) => path,
925	Err(_) => return path.display().to_string(),
926	};
927	let mut components_iter = path.components().peekable();
928
929	for current_path_component in base.components() {
930	let Some(path_component) = components_iter.peek() else {
931	return path.display().to_string();
932	};
933	if current_path_component != *path_component {
934	break;
935	}
936	components_iter.next();
937	}
938	let path_parts = components_iter
939	.map(\|c\| c.as_os_str().to_string_lossy())
940	.collect::<Vec<_>>();
941	if path_parts.is_empty() {
942	path.display().to_string()
943	} else {
944	path_parts.join(std::path::MAIN_SEPARATOR_STR)
945	}
946	}
947
948	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
949	pub enum IntKind {
950	I8,
951	I16,
952	I32,
953	I64,
954	I128,
955	Isize,
956	U8,
957	U16,
958	U32,
959	U64,
960	U128,
961	Usize,
962	}
963
964	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
965	pub enum FloatKind {
966	F16,
967	F32,
968	F64,
969	F128,
970	}
971
972	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
973	enum NumKind {
974	Int(IntKind),
975	Float(FloatKind),
976	}
977
978	/// Primitive integer types. Also used as number suffixes.
979	const INTEGER_TYPES: &[(&str, IntKind)] = &[
980	("i8", IntKind::I8),
981	("i16", IntKind::I16),
982	("i32", IntKind::I32),
983	("i64", IntKind::I64),
984	("i128", IntKind::I128),
985	("isize", IntKind::Isize),
986	("u8", IntKind::U8),
987	("u16", IntKind::U16),
988	("u32", IntKind::U32),
989	("u64", IntKind::U64),
990	("u128", IntKind::U128),
991	("usize", IntKind::Usize),
992	];
993
994	/// Primitive floating point types. Also used as number suffixes.
995	const FLOAT_TYPES: &[(&str, FloatKind)] = &[
996	("f16", FloatKind::F16),
997	("f32", FloatKind::F32),
998	("f64", FloatKind::F64),
999	("f128", FloatKind::F128),
1000	];
1001
1002	/// Primitive numeric types. Also used as number suffixes.
1003	const NUM_TYPES: &[(&str, NumKind)] = &{
1004	let mut list: [(&'static str, NumKind); 16] = [("", NumKind::Int(IntKind::I8)); INTEGER_TYPES.len() + FLOAT_TYPES.len()];
1005	let mut i: usize = `0`;
1006	let mut o: usize = `0`;
1007	while i < INTEGER_TYPES.len() {
1008	let (name: &str, value: IntKind) = INTEGER_TYPES[i];
1009	list[o] = (name, NumKind::Int(value));
1010	i += `1`;
1011	o += `1`;
1012	}
1013	let mut i: usize = `0`;
1014	while i < FLOAT_TYPES.len() {
1015	let (name: &str, value: FloatKind) = FLOAT_TYPES[i];
1016	list[o] = (name, NumKind::Float(value));
1017	i += `1`;
1018	o += `1`;
1019	}
1020	list
1021	};
1022
1023	/// Complete list of named primitive types.
1024	const PRIMITIVE_TYPES: &[&str] = &{
1025	let mut list: [&'static str; 17] = [""; NUM_TYPES.len() + `1`];
1026	let mut i: usize = `0`;
1027	let mut o: usize = `0`;
1028	while i < NUM_TYPES.len() {
1029	list[o] = NUM_TYPES[i].0;
1030	i += `1`;
1031	o += `1`;
1032	}
1033	list[o] = "bool";
1034	list
1035	};
1036
1037	#[cfg(not(windows))]
1038	#[cfg(test)]
1039	mod test {
1040	use std::path::Path;
1041
1042	use super::*;
1043
1044	#[test]
1045	fn test_strip_common() {
1046	// Full path is returned instead of empty when the entire path is in common.
1047	assert_eq!(strip_common(Path::new("home"), Path::new("home")), "home");
1048
1049	let cwd = std::env::current_dir().expect("current_dir failed");
1050
1051	// We need actual existing paths for `canonicalize` to work, so let's do that.
1052	let entry = cwd
1053	.read_dir()
1054	.expect("read_dir failed")
1055	.filter_map(std::result::Result::ok)
1056	.find(\|f\| f.path().is_file())
1057	.expect("no entry");
1058
1059	// Since they have the complete path in common except for the folder entry name, it should
1060	// return only the folder entry name.
1061	assert_eq!(
1062	strip_common(&cwd, &entry.path()),
1063	entry.file_name().to_string_lossy()
1064	);
1065
1066	// In this case it cannot canonicalize `/a/b/c` so it returns the path as is.
1067	assert_eq!(strip_common(&cwd, Path::new("/a/b/c")), "/a/b/c");
1068	}
1069
1070	#[test]
1071	fn test_num_lit() {
1072	// Should fail.
1073	assert!(num_lit(".").is_err());
1074	// Should succeed.
1075	assert_eq!(
1076	num_lit("1.2E-02").unwrap(),
1077	("", Num::Float("1.2E-02", None))
1078	);
1079	assert_eq!(num_lit("4e3").unwrap(), ("", Num::Float("4e3", None)),);
1080	assert_eq!(num_lit("4e+_3").unwrap(), ("", Num::Float("4e+_3", None)),);
1081	// Not supported because Rust wants a number before the `.`.
1082	assert!(num_lit(".1").is_err());
1083	assert!(num_lit(".1E-02").is_err());
1084	// A `_` directly after the `.` denotes a field.
1085	assert_eq!(num_lit("1._0").unwrap(), ("._0", Num::Int("1", None)));
1086	assert_eq!(num_lit("1_.0").unwrap(), ("", Num::Float("1_.0", None)));
1087	// Not supported (voluntarily because of `1..` syntax).
1088	assert_eq!(num_lit("1.").unwrap(), (".", Num::Int("1", None)));
1089	assert_eq!(num_lit("1_.").unwrap(), (".", Num::Int("1_", None)));
1090	assert_eq!(num_lit("1_2.").unwrap(), (".", Num::Int("1_2", None)));
1091	// Numbers with suffixes
1092	assert_eq!(
1093	num_lit("-1usize").unwrap(),
1094	("", Num::Int("-1", Some(IntKind::Usize)))
1095	);
1096	assert_eq!(
1097	num_lit("123_f32").unwrap(),
1098	("", Num::Float("123_", Some(FloatKind::F32)))
1099	);
1100	assert_eq!(
1101	num_lit("1_.2_e+_3_f64\|into_isize").unwrap(),
1102	(
1103	"\|into_isize",
1104	Num::Float("1_.2_e+_3_", Some(FloatKind::F64))
1105	)
1106	);
1107	assert_eq!(
1108	num_lit("4e3f128").unwrap(),
1109	("", Num::Float("4e3", Some(FloatKind::F128))),
1110	);
1111	}
1112
1113	#[test]
1114	fn test_char_lit() {
1115	let lit = \|s: &'static str\| crate::CharLit {
1116	prefix: None,
1117	content: s,
1118	};
1119
1120	assert_eq!(char_lit("'a'").unwrap(), ("", lit("a")));
1121	assert_eq!(char_lit("'字'").unwrap(), ("", lit("字")));
1122
1123	// Escaped single characters.
1124	assert_eq!(char_lit("'`\\\"`'").unwrap(), ("", lit("`\\\"`")));
1125	assert_eq!(char_lit("'`\\`''").unwrap(), ("", lit("`\\`'")));
1126	assert_eq!(char_lit("'`\\`t'").unwrap(), ("", lit("`\\`t")));
1127	assert_eq!(char_lit("'`\\`n'").unwrap(), ("", lit("`\\`n")));
1128	assert_eq!(char_lit("'`\\`r'").unwrap(), ("", lit("`\\`r")));
1129	assert_eq!(char_lit("'`\\`0'").unwrap(), ("", lit("`\\`0")));
1130	// Escaped ascii characters (up to `0x7F`).
1131	assert_eq!(char_lit("'`\\`x12'").unwrap(), ("", lit("`\\`x12")));
1132	assert_eq!(char_lit("'`\\`x02'").unwrap(), ("", lit("`\\`x02")));
1133	assert_eq!(char_lit("'`\\`x6a'").unwrap(), ("", lit("`\\`x6a")));
1134	assert_eq!(char_lit("'`\\`x7F'").unwrap(), ("", lit("`\\`x7F")));
1135	// Escaped unicode characters (up to `0x10FFFF`).
1136	assert_eq!(char_lit("'`\\`u{A}'").unwrap(), ("", lit("`\\`u{A}")));
1137	assert_eq!(char_lit("'`\\`u{10}'").unwrap(), ("", lit("`\\`u{10}")));
1138	assert_eq!(char_lit("'`\\`u{aa}'").unwrap(), ("", lit("`\\`u{aa}")));
1139	assert_eq!(char_lit("'`\\`u{10FFFF}'").unwrap(), ("", lit("`\\`u{10FFFF}")));
1140
1141	// Check with `b` prefix.
1142	assert_eq!(
1143	char_lit("b'a'").unwrap(),
1144	("", crate::CharLit {
1145	prefix: Some(crate::CharPrefix::Binary),
1146	content: "a"
1147	})
1148	);
1149
1150	// Should fail.
1151	assert!(char_lit("''").is_err());
1152	assert!(char_lit("'`\\`o'").is_err());
1153	assert!(char_lit("'`\\`x'").is_err());
1154	assert!(char_lit("'`\\`x1'").is_err());
1155	assert!(char_lit("'`\\`x80'").is_err());
1156	assert!(char_lit("'`\\`u'").is_err());
1157	assert!(char_lit("'`\\`u{}'").is_err());
1158	assert!(char_lit("'`\\`u{110000}'").is_err());
1159	}
1160
1161	#[test]
1162	fn test_str_lit() {
1163	assert_eq!(
1164	str_lit(r#"b"hello""#).unwrap(),
1165	("", StrLit {
1166	prefix: Some(StrPrefix::Binary),
1167	content: "hello"
1168	})
1169	);
1170	assert_eq!(
1171	str_lit(r#"c"hello""#).unwrap(),
1172	("", StrLit {
1173	prefix: Some(StrPrefix::CLike),
1174	content: "hello"
1175	})
1176	);
1177	assert!(str_lit(r#"d"hello""#).is_err());
1178	}
1179	}
1180