| 1 | use std::ops::RangeInclusive; |
| 2 | |
| 3 | use winnow::combinator::alt; |
| 4 | use winnow::combinator::empty; |
| 5 | use winnow::combinator::eof; |
| 6 | use winnow::combinator::fail; |
| 7 | use winnow::combinator::opt; |
| 8 | use winnow::combinator::peek; |
| 9 | use winnow::combinator::repeat; |
| 10 | use winnow::combinator::terminated; |
| 11 | use winnow::prelude::*; |
| 12 | use winnow::token::any; |
| 13 | use winnow::token::one_of; |
| 14 | use winnow::token::take_while; |
| 15 | |
| 16 | use crate::parser::prelude::*; |
| 17 | |
| 18 | pub(crate) unsafe fn from_utf8_unchecked<'b>( |
| 19 | bytes: &'b [u8], |
| 20 | safety_justification: &'static str, |
| 21 | ) -> &'b str { |
| 22 | unsafe { |
| 23 | if cfg!(debug_assertions) { |
| 24 | // Catch problems more quickly when testing |
| 25 | std::str::from_utf8(bytes).expect(msg:safety_justification) |
| 26 | } else { |
| 27 | std::str::from_utf8_unchecked(bytes) |
| 28 | } |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | // wschar = ( %x20 / ; Space |
| 33 | // %x09 ) ; Horizontal tab |
| 34 | pub(crate) const WSCHAR: (u8, u8) = (b' ' , b' \t' ); |
| 35 | |
| 36 | // ws = *wschar |
| 37 | pub(crate) fn ws<'i>(input: &mut Input<'i>) -> ModalResult<&'i str> { |
| 38 | take_whileMap, …, …>, …, …, …, …, …>(occurrences:0.., WSCHAR) |
| 39 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII" ) }) |
| 40 | .parse_next(input) |
| 41 | } |
| 42 | |
| 43 | // non-ascii = %x80-D7FF / %xE000-10FFFF |
| 44 | // - ASCII is 0xxxxxxx |
| 45 | // - First byte for UTF-8 is 11xxxxxx |
| 46 | // - Subsequent UTF-8 bytes are 10xxxxxx |
| 47 | pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; |
| 48 | |
| 49 | // non-eol = %x09 / %x20-7E / non-ascii |
| 50 | pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = |
| 51 | (0x09, 0x20..=0x7E, NON_ASCII); |
| 52 | |
| 53 | // comment-start-symbol = %x23 ; # |
| 54 | pub(crate) const COMMENT_START_SYMBOL: u8 = b'#' ; |
| 55 | |
| 56 | // comment = comment-start-symbol *non-eol |
| 57 | pub(crate) fn comment(input: &mut Input<'_>) -> ModalResult<()> { |
| 58 | (COMMENT_START_SYMBOL, take_while(occurrences:0.., NON_EOL)) |
| 59 | .void() |
| 60 | .parse_next(input) |
| 61 | } |
| 62 | |
| 63 | // newline = ( %x0A / ; LF |
| 64 | // %x0D.0A ) ; CRLF |
| 65 | pub(crate) fn newline(input: &mut Input<'_>) -> ModalResult<()> { |
| 66 | dispatchimpl Parser, …>, …, …>! {any; |
| 67 | b' \n' => empty, |
| 68 | b' \r' => one_of(LF).void(), |
| 69 | _ => fail, |
| 70 | } |
| 71 | .parse_next(input) |
| 72 | } |
| 73 | pub(crate) const LF: u8 = b' \n' ; |
| 74 | pub(crate) const CR: u8 = b' \r' ; |
| 75 | |
| 76 | // ws-newline = *( wschar / newline ) |
| 77 | pub(crate) fn ws_newline(input: &mut Input<'_>) -> ModalResult<()> { |
| 78 | repeatMap, …, …>, …, …, …, …>, …, …, …, …, …>( |
| 79 | occurrences:0.., |
| 80 | parser:alt((newline.value(&b" \n" [..]), take_while(occurrences:1.., WSCHAR))), |
| 81 | ) |
| 82 | .map(|()| ()) |
| 83 | .parse_next(input) |
| 84 | } |
| 85 | |
| 86 | // ws-newlines = newline *( wschar / newline ) |
| 87 | pub(crate) fn ws_newlines(input: &mut Input<'_>) -> ModalResult<()> { |
| 88 | (newline, ws_newline).void().parse_next(input) |
| 89 | } |
| 90 | |
| 91 | // note: this rule is not present in the original grammar |
| 92 | // ws-comment-newline = *( ws-newline-nonempty / comment ) |
| 93 | pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> ModalResult<()> { |
| 94 | let mut start: Checkpoint, …>, …> = input.checkpoint(); |
| 95 | loop { |
| 96 | let _ = ws.parse_next(input)?; |
| 97 | |
| 98 | let next_token: Option = opt(parser:peek(parser:any)).parse_next(input)?; |
| 99 | match next_token { |
| 100 | Some(b'#' ) => (comment, newline).void().parse_next(input)?, |
| 101 | Some(b' \n' ) => (newline).void().parse_next(input)?, |
| 102 | Some(b' \r' ) => (newline).void().parse_next(input)?, |
| 103 | _ => break, |
| 104 | } |
| 105 | |
| 106 | let end: Checkpoint, …>, …> = input.checkpoint(); |
| 107 | if start == end { |
| 108 | break; |
| 109 | } |
| 110 | start = end; |
| 111 | } |
| 112 | |
| 113 | Ok(()) |
| 114 | } |
| 115 | |
| 116 | // note: this rule is not present in the original grammar |
| 117 | // line-ending = newline / eof |
| 118 | pub(crate) fn line_ending(input: &mut Input<'_>) -> ModalResult<()> { |
| 119 | altVoid, …, …>, …, …, …>((newline.value(val:" \n" ), eof.value(val:"" ))) |
| 120 | .void() |
| 121 | .parse_next(input) |
| 122 | } |
| 123 | |
| 124 | // note: this rule is not present in the original grammar |
| 125 | // line-trailing = ws [comment] skip-line-ending |
| 126 | pub(crate) fn line_trailing(input: &mut Input<'_>) -> ModalResult<std::ops::Range<usize>> { |
| 127 | terminated((ws, opt(comment)).span(), ignored:line_ending).parse_next(input) |
| 128 | } |
| 129 | |
| 130 | #[cfg (test)] |
| 131 | #[cfg (feature = "parse" )] |
| 132 | #[cfg (feature = "display" )] |
| 133 | mod test { |
| 134 | use super::*; |
| 135 | |
| 136 | #[test ] |
| 137 | fn trivia() { |
| 138 | let inputs = [ |
| 139 | "" , |
| 140 | r#" "# , |
| 141 | r#" |
| 142 | "# , |
| 143 | r#" |
| 144 | # comment |
| 145 | |
| 146 | # comment2 |
| 147 | |
| 148 | |
| 149 | "# , |
| 150 | r#" |
| 151 | "# , |
| 152 | r#"# comment |
| 153 | # comment2 |
| 154 | |
| 155 | |
| 156 | "# , |
| 157 | ]; |
| 158 | for input in inputs { |
| 159 | dbg!(input); |
| 160 | let parsed = ws_comment_newline.take().parse(new_input(input)); |
| 161 | assert!(parsed.is_ok(), "{parsed:?}" ); |
| 162 | let parsed = parsed.unwrap(); |
| 163 | assert_eq!(parsed, input.as_bytes()); |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |