1use std::ops::RangeInclusive;
2
3use winnow::combinator::alt;
4use winnow::combinator::eof;
5use winnow::combinator::opt;
6use winnow::combinator::repeat;
7use winnow::combinator::terminated;
8use winnow::prelude::*;
9use winnow::token::one_of;
10use winnow::token::take_while;
11
12use crate::parser::prelude::*;
13
14pub(crate) unsafe fn from_utf8_unchecked<'b>(
15 bytes: &'b [u8],
16 safety_justification: &'static str,
17) -> &'b str {
18 if cfg!(debug_assertions) {
19 // Catch problems more quickly when testing
20 std::str::from_utf8(bytes).expect(msg:safety_justification)
21 } else {
22 std::str::from_utf8_unchecked(bytes)
23 }
24}
25
26// wschar = ( %x20 / ; Space
27// %x09 ) ; Horizontal tab
28pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
29
30// ws = *wschar
31pub(crate) fn ws(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
32 take_whileMap, …>, …, …, …, …, …>(range:0.., WSCHAR)
33 .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII") })
34 .parse_next(input)
35}
36
37// non-ascii = %x80-D7FF / %xE000-10FFFF
38// - ASCII is 0xxxxxxx
39// - First byte for UTF-8 is 11xxxxxx
40// - Subsequent UTF-8 bytes are 10xxxxxx
41pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
42
43// non-eol = %x09 / %x20-7E / non-ascii
44pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
45 (0x09, 0x20..=0x7E, NON_ASCII);
46
47// comment-start-symbol = %x23 ; #
48pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
49
50// comment = comment-start-symbol *non-eol
51pub(crate) fn comment(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
52 (COMMENT_START_SYMBOL, take_while(range:0.., NON_EOL))
53 .recognize()
54 .parse_next(input)
55}
56
57// newline = ( %x0A / ; LF
58// %x0D.0A ) ; CRLF
59pub(crate) fn newline(input: Input<'_>) -> IResult<Input<'_>, u8, ParserError<'_>> {
60 altimpl Parser, …>((
61 one_of(LF).value(val:b'\n'),
62 (one_of(CR), one_of(LF)).value(val:b'\n'),
63 ))
64 .parse_next(input)
65}
66pub(crate) const LF: u8 = b'\n';
67pub(crate) const CR: u8 = b'\r';
68
69// ws-newline = *( wschar / newline )
70pub(crate) fn ws_newline(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
71 repeatMap, …, …, …, …, …>, …, …, …>, …, …, …, …, …>(
72 range:0..,
73 f:alt((newline.value(&b"\n"[..]), take_while(range:1.., WSCHAR))),
74 )
75 .map(|()| ())
76 .recognize()
77 .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII") })
78 .parse_next(input)
79}
80
81// ws-newlines = newline *( wschar / newline )
82pub(crate) fn ws_newlines(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
83 (newline, ws_newline)
84 .recognize()
85 .map(|b: &[u8]| unsafe {
86 from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII")
87 })
88 .parse_next(input)
89}
90
91// note: this rule is not present in the original grammar
92// ws-comment-newline = *( ws-newline-nonempty / comment )
93pub(crate) fn ws_comment_newline(input: Input<'_>) -> IResult<Input<'_>, &[u8], ParserError<'_>> {
94 repeatRecognize, …, …, …, …, …>, …, …, …>(
95 range:0..,
96 f:alt((
97 repeatimpl Parser, …>(
98 range:1..,
99 f:alt((take_while(range:1.., WSCHAR), newline.value(&b"\n"[..]))),
100 )
101 .map(|()| ()),
102 comment.value(()),
103 )),
104 )
105 .map(|()| ())
106 .recognize()
107 .parse_next(input)
108}
109
110// note: this rule is not present in the original grammar
111// line-ending = newline / eof
112pub(crate) fn line_ending(input: Input<'_>) -> IResult<Input<'_>, &str, ParserError<'_>> {
113 alt((newline.value(val:"\n"), eof.value(val:""))).parse_next(input)
114}
115
116// note: this rule is not present in the original grammar
117// line-trailing = ws [comment] skip-line-ending
118pub(crate) fn line_trailing(
119 input: Input<'_>,
120) -> IResult<Input<'_>, std::ops::Range<usize>, ParserError<'_>> {
121 terminated((ws, opt(comment)).span(), second:line_ending).parse_next(input)
122}
123
124#[cfg(test)]
125mod test {
126 use super::*;
127
128 #[test]
129 fn trivia() {
130 let inputs = [
131 "",
132 r#" "#,
133 r#"
134"#,
135 r#"
136# comment
137
138# comment2
139
140
141"#,
142 r#"
143 "#,
144 r#"# comment
145# comment2
146
147
148 "#,
149 ];
150 for input in inputs {
151 dbg!(input);
152 let parsed = ws_comment_newline.parse(new_input(input));
153 assert!(parsed.is_ok(), "{:?}", parsed);
154 let parsed = parsed.unwrap();
155 assert_eq!(parsed, input.as_bytes());
156 }
157 }
158}
159