trivia.rs source code [crates/toml_edit/src/parser/trivia.rs]

1	use std::ops::RangeInclusive;
2
3	use winnow::combinator::alt;
4	use winnow::combinator::eof;
5	use winnow::combinator::opt;
6	use winnow::combinator::repeat;
7	use winnow::combinator::terminated;
8	use winnow::prelude::*;
9	use winnow::token::one_of;
10	use winnow::token::take_while;
11
12	use crate::parser::prelude::*;
13
14	pub(crate) unsafe fn from_utf8_unchecked<'b>(
15	bytes: &'b [u8],
16	safety_justification: &'static str,
17	) -> &'b str {
18	if cfg!(debug_assertions) {
19	// Catch problems more quickly when testing
20	std::str::from_utf8(bytes).expect(safety_justification)
21	} else {
22	std::str::from_utf8_unchecked(bytes)
23	}
24	}
25
26	// wschar = ( %x20 / ; Space
27	// %x09 ) ; Horizontal tab
28	pub(crate) const WSCHAR: (u8, u8) = (b' ', b'`\t`');
29
30	// ws = wschar*
31	pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
32	take_whileMap, …, …, …, …, …>(range:`0`.., WSCHAR)
33	.map(\|b: &[u8]\| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII") })
34	.parse_next(input)
35	}
36
37	// non-ascii = %x80-D7FF / %xE000-10FFFF
38	// - ASCII is 0xxxxxxx
39	// - First byte for UTF-8 is 11xxxxxx
40	// - Subsequent UTF-8 bytes are 10xxxxxx
41	pub(crate) const NON_ASCII: RangeInclusive<u8> = `0x80`..=`0xff`;
42
43	// non-eol = %x09 / %x20-7E / non-ascii
44	pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
45	(`0x09`, `0x20`..=`0x7E`, NON_ASCII);
46
47	// comment-start-symbol = %x23 ; #
48	pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
49
50	// comment = comment-start-symbol non-eol*
51	pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
52	(COMMENT_START_SYMBOL, take_while(range:`0`.., NON_EOL))
53	.recognize()
54	.parse_next(input)
55	}
56
57	// newline = ( %x0A / ; LF
58	// %x0D.0A ) ; CRLF
59	pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
60	altimpl Parser<{unknown}, {unknown}, …>((
61	one_of(LF).value(val:b'`\n`'),
62	(one_of(CR), one_of(LF)).value(val:b'`\n`'),
63	))
64	.parse_next(input)
65	}
66	pub(crate) const LF: u8 = b'`\n`';
67	pub(crate) const CR: u8 = b'`\r`';
68
69	// ws-newline = ( wschar / newline )*
70	pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
71	repeatMap, …, …, …, …, …>, …, …, …>, …, …, …, …, …>(
72	range:`0`..,
73	parser:alt((newline.value(&b"`\n`"[..]), take_while(range:`1`.., WSCHAR))),
74	)
75	.map(\|()\| ())
76	.recognize()
77	.map(\|b: &[u8]\| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII") })
78	.parse_next(input)
79	}
80
81	// ws-newlines = newline ( wschar / newline )*
82	pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
83	(newline, ws_newline)
84	.recognize()
85	.map(\|b: &[u8]\| unsafe {
86	from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII")
87	})
88	.parse_next(input)
89	}
90
91	// note: this rule is not present in the original grammar
92	// ws-comment-newline = ( ws-newline-nonempty / comment )*
93	pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
94	repeatRecognize, …, …, …, …>, …, …, …, …, …>, …, …, …>(
95	range:`0`..,
96	parser:alt((
97	repeatRepeat, …, …, …, …>(
98	range:`1`..,
99	parser:alt((take_while(range:`1`.., WSCHAR), newline.value(&b"`\n`"[..]))),
100	)
101	.map(\|()\| ()),
102	comment.value(()),
103	)),
104	)
105	.map(\|()\| ())
106	.recognize()
107	.parse_next(input)
108	}
109
110	// note: this rule is not present in the original grammar
111	// line-ending = newline / eof
112	pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
113	alt((newline.value(val:"`\n`"), eof.value(val:""))).parse_next(input)
114	}
115
116	// note: this rule is not present in the original grammar
117	// line-trailing = ws [comment] skip-line-ending
118	pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> {
119	terminated((ws, opt(comment)).span(), second:line_ending).parse_next(input)
120	}
121
122	#[cfg(test)]
123	mod test {
124	use super::*;
125
126	#[test]
127	fn trivia() {
128	let inputs = [
129	"",
130	r#" "#,
131	r#"
132	"#,
133	r#"
134	# comment
135
136	# comment2
137
138
139	"#,
140	r#"
141	"#,
142	r#"# comment
143	# comment2
144
145
146	"#,
147	];
148	for input in inputs {
149	dbg!(input);
150	let parsed = ws_comment_newline.parse(new_input(input));
151	assert!(parsed.is_ok(), "{:?}", parsed);
152	let parsed = parsed.unwrap();
153	assert_eq!(parsed, input.as_bytes());
154	}
155	}
156	}
157