1 | use std::ops::RangeInclusive; |
2 | |
3 | use winnow::combinator::alt; |
4 | use winnow::combinator::empty; |
5 | use winnow::combinator::eof; |
6 | use winnow::combinator::fail; |
7 | use winnow::combinator::opt; |
8 | use winnow::combinator::peek; |
9 | use winnow::combinator::repeat; |
10 | use winnow::combinator::terminated; |
11 | use winnow::prelude::*; |
12 | use winnow::token::any; |
13 | use winnow::token::one_of; |
14 | use winnow::token::take_while; |
15 | |
16 | use crate::parser::prelude::*; |
17 | |
18 | pub(crate) unsafe fn from_utf8_unchecked<'b>( |
19 | bytes: &'b [u8], |
20 | safety_justification: &'static str, |
21 | ) -> &'b str { |
22 | unsafe { |
23 | if cfg!(debug_assertions) { |
24 | // Catch problems more quickly when testing |
25 | std::str::from_utf8(bytes).expect(msg:safety_justification) |
26 | } else { |
27 | std::str::from_utf8_unchecked(bytes) |
28 | } |
29 | } |
30 | } |
31 | |
32 | // wschar = ( %x20 / ; Space |
33 | // %x09 ) ; Horizontal tab |
34 | pub(crate) const WSCHAR: (u8, u8) = (b' ' , b' \t' ); |
35 | |
36 | // ws = *wschar |
37 | pub(crate) fn ws<'i>(input: &mut Input<'i>) -> ModalResult<&'i str> { |
38 | take_whileMap, …, …>, …, …, …, …, …>(occurrences:0.., WSCHAR) |
39 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII" ) }) |
40 | .parse_next(input) |
41 | } |
42 | |
43 | // non-ascii = %x80-D7FF / %xE000-10FFFF |
44 | // - ASCII is 0xxxxxxx |
45 | // - First byte for UTF-8 is 11xxxxxx |
46 | // - Subsequent UTF-8 bytes are 10xxxxxx |
47 | pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; |
48 | |
49 | // non-eol = %x09 / %x20-7E / non-ascii |
50 | pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = |
51 | (0x09, 0x20..=0x7E, NON_ASCII); |
52 | |
53 | // comment-start-symbol = %x23 ; # |
54 | pub(crate) const COMMENT_START_SYMBOL: u8 = b'#' ; |
55 | |
56 | // comment = comment-start-symbol *non-eol |
57 | pub(crate) fn comment(input: &mut Input<'_>) -> ModalResult<()> { |
58 | (COMMENT_START_SYMBOL, take_while(occurrences:0.., NON_EOL)) |
59 | .void() |
60 | .parse_next(input) |
61 | } |
62 | |
63 | // newline = ( %x0A / ; LF |
64 | // %x0D.0A ) ; CRLF |
65 | pub(crate) fn newline(input: &mut Input<'_>) -> ModalResult<()> { |
66 | dispatchimpl Parser, …>, …, …>! {any; |
67 | b' \n' => empty, |
68 | b' \r' => one_of(LF).void(), |
69 | _ => fail, |
70 | } |
71 | .parse_next(input) |
72 | } |
73 | pub(crate) const LF: u8 = b' \n' ; |
74 | pub(crate) const CR: u8 = b' \r' ; |
75 | |
76 | // ws-newline = *( wschar / newline ) |
77 | pub(crate) fn ws_newline(input: &mut Input<'_>) -> ModalResult<()> { |
78 | repeatMap, …, …>, …, …, …, …>, …, …, …, …, …>( |
79 | occurrences:0.., |
80 | parser:alt((newline.value(&b" \n" [..]), take_while(occurrences:1.., WSCHAR))), |
81 | ) |
82 | .map(|()| ()) |
83 | .parse_next(input) |
84 | } |
85 | |
86 | // ws-newlines = newline *( wschar / newline ) |
87 | pub(crate) fn ws_newlines(input: &mut Input<'_>) -> ModalResult<()> { |
88 | (newline, ws_newline).void().parse_next(input) |
89 | } |
90 | |
91 | // note: this rule is not present in the original grammar |
92 | // ws-comment-newline = *( ws-newline-nonempty / comment ) |
93 | pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> ModalResult<()> { |
94 | let mut start: Checkpoint, …>, …> = input.checkpoint(); |
95 | loop { |
96 | let _ = ws.parse_next(input)?; |
97 | |
98 | let next_token: Option = opt(parser:peek(parser:any)).parse_next(input)?; |
99 | match next_token { |
100 | Some(b'#' ) => (comment, newline).void().parse_next(input)?, |
101 | Some(b' \n' ) => (newline).void().parse_next(input)?, |
102 | Some(b' \r' ) => (newline).void().parse_next(input)?, |
103 | _ => break, |
104 | } |
105 | |
106 | let end: Checkpoint, …>, …> = input.checkpoint(); |
107 | if start == end { |
108 | break; |
109 | } |
110 | start = end; |
111 | } |
112 | |
113 | Ok(()) |
114 | } |
115 | |
116 | // note: this rule is not present in the original grammar |
117 | // line-ending = newline / eof |
118 | pub(crate) fn line_ending(input: &mut Input<'_>) -> ModalResult<()> { |
119 | altVoid, …, …>, …, …, …>((newline.value(val:" \n" ), eof.value(val:"" ))) |
120 | .void() |
121 | .parse_next(input) |
122 | } |
123 | |
124 | // note: this rule is not present in the original grammar |
125 | // line-trailing = ws [comment] skip-line-ending |
126 | pub(crate) fn line_trailing(input: &mut Input<'_>) -> ModalResult<std::ops::Range<usize>> { |
127 | terminated((ws, opt(comment)).span(), ignored:line_ending).parse_next(input) |
128 | } |
129 | |
130 | #[cfg (test)] |
131 | #[cfg (feature = "parse" )] |
132 | #[cfg (feature = "display" )] |
133 | mod test { |
134 | use super::*; |
135 | |
136 | #[test ] |
137 | fn trivia() { |
138 | let inputs = [ |
139 | "" , |
140 | r#" "# , |
141 | r#" |
142 | "# , |
143 | r#" |
144 | # comment |
145 | |
146 | # comment2 |
147 | |
148 | |
149 | "# , |
150 | r#" |
151 | "# , |
152 | r#"# comment |
153 | # comment2 |
154 | |
155 | |
156 | "# , |
157 | ]; |
158 | for input in inputs { |
159 | dbg!(input); |
160 | let parsed = ws_comment_newline.take().parse(new_input(input)); |
161 | assert!(parsed.is_ok(), "{parsed:?}" ); |
162 | let parsed = parsed.unwrap(); |
163 | assert_eq!(parsed, input.as_bytes()); |
164 | } |
165 | } |
166 | } |
167 | |