1 | use std::ops::RangeInclusive; |
2 | |
3 | use winnow::combinator::alt; |
4 | use winnow::combinator::eof; |
5 | use winnow::combinator::opt; |
6 | use winnow::combinator::repeat; |
7 | use winnow::combinator::terminated; |
8 | use winnow::prelude::*; |
9 | use winnow::token::one_of; |
10 | use winnow::token::take_while; |
11 | |
12 | use crate::parser::prelude::*; |
13 | |
14 | pub(crate) unsafe fn from_utf8_unchecked<'b>( |
15 | bytes: &'b [u8], |
16 | safety_justification: &'static str, |
17 | ) -> &'b str { |
18 | if cfg!(debug_assertions) { |
19 | // Catch problems more quickly when testing |
20 | std::str::from_utf8(bytes).expect(msg:safety_justification) |
21 | } else { |
22 | std::str::from_utf8_unchecked(bytes) |
23 | } |
24 | } |
25 | |
26 | // wschar = ( %x20 / ; Space |
27 | // %x09 ) ; Horizontal tab |
28 | pub(crate) const WSCHAR: (u8, u8) = (b' ' , b' \t' ); |
29 | |
30 | // ws = *wschar |
31 | pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
32 | take_whileMap, …>, …, …, …, …, …>(range:0.., WSCHAR) |
33 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII" ) }) |
34 | .parse_next(input) |
35 | } |
36 | |
37 | // non-ascii = %x80-D7FF / %xE000-10FFFF |
38 | // - ASCII is 0xxxxxxx |
39 | // - First byte for UTF-8 is 11xxxxxx |
40 | // - Subsequent UTF-8 bytes are 10xxxxxx |
41 | pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; |
42 | |
43 | // non-eol = %x09 / %x20-7E / non-ascii |
44 | pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = |
45 | (0x09, 0x20..=0x7E, NON_ASCII); |
46 | |
47 | // comment-start-symbol = %x23 ; # |
48 | pub(crate) const COMMENT_START_SYMBOL: u8 = b'#' ; |
49 | |
50 | // comment = comment-start-symbol *non-eol |
51 | pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { |
52 | (COMMENT_START_SYMBOL, take_while(range:0.., NON_EOL)) |
53 | .recognize() |
54 | .parse_next(input) |
55 | } |
56 | |
57 | // newline = ( %x0A / ; LF |
58 | // %x0D.0A ) ; CRLF |
59 | pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> { |
60 | altimpl Parser, …>(( |
61 | one_of(LF).value(val:b' \n' ), |
62 | (one_of(CR), one_of(LF)).value(val:b' \n' ), |
63 | )) |
64 | .parse_next(input) |
65 | } |
66 | pub(crate) const LF: u8 = b' \n' ; |
67 | pub(crate) const CR: u8 = b' \r' ; |
68 | |
69 | // ws-newline = *( wschar / newline ) |
70 | pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
71 | repeatMap, …, …, …, …, …>, …, …, …>, …, …, …, …, …>( |
72 | range:0.., |
73 | parser:alt((newline.value(&b" \n" [..]), take_while(range:1.., WSCHAR))), |
74 | ) |
75 | .map(|()| ()) |
76 | .recognize() |
77 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII" ) }) |
78 | .parse_next(input) |
79 | } |
80 | |
81 | // ws-newlines = newline *( wschar / newline ) |
82 | pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
83 | (newline, ws_newline) |
84 | .recognize() |
85 | .map(|b: &[u8]| unsafe { |
86 | from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII" ) |
87 | }) |
88 | .parse_next(input) |
89 | } |
90 | |
91 | // note: this rule is not present in the original grammar |
92 | // ws-comment-newline = *( ws-newline-nonempty / comment ) |
93 | pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { |
94 | repeatRecognize( |
95 | range:0.., |
96 | parser:alt(( |
97 | repeatRepeat, …>, …, …, …, …>( |
98 | range:1.., |
99 | parser:alt((take_while(range:1.., WSCHAR), newline.value(&b" \n" [..]))), |
100 | ) |
101 | .map(|()| ()), |
102 | comment.value(()), |
103 | )), |
104 | ) |
105 | .map(|()| ()) |
106 | .recognize() |
107 | .parse_next(input) |
108 | } |
109 | |
110 | // note: this rule is not present in the original grammar |
111 | // line-ending = newline / eof |
112 | pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
113 | alt((newline.value(val:" \n" ), eof.value(val:"" ))).parse_next(input) |
114 | } |
115 | |
116 | // note: this rule is not present in the original grammar |
117 | // line-trailing = ws [comment] skip-line-ending |
118 | pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> { |
119 | terminated((ws, opt(comment)).span(), second:line_ending).parse_next(input) |
120 | } |
121 | |
122 | #[cfg (test)] |
123 | mod test { |
124 | use super::*; |
125 | |
126 | #[test ] |
127 | fn trivia() { |
128 | let inputs = [ |
129 | "" , |
130 | r#" "# , |
131 | r#" |
132 | "# , |
133 | r#" |
134 | # comment |
135 | |
136 | # comment2 |
137 | |
138 | |
139 | "# , |
140 | r#" |
141 | "# , |
142 | r#"# comment |
143 | # comment2 |
144 | |
145 | |
146 | "# , |
147 | ]; |
148 | for input in inputs { |
149 | dbg!(input); |
150 | let parsed = ws_comment_newline.parse(new_input(input)); |
151 | assert!(parsed.is_ok(), " {:?}" , parsed); |
152 | let parsed = parsed.unwrap(); |
153 | assert_eq!(parsed, input.as_bytes()); |
154 | } |
155 | } |
156 | } |
157 | |