| 1 | use std::ops::RangeInclusive; |
| 2 | |
| 3 | use winnow::combinator::alt; |
| 4 | use winnow::combinator::eof; |
| 5 | use winnow::combinator::opt; |
| 6 | use winnow::combinator::repeat; |
| 7 | use winnow::combinator::terminated; |
| 8 | use winnow::prelude::*; |
| 9 | use winnow::token::one_of; |
| 10 | use winnow::token::take_while; |
| 11 | |
| 12 | use crate::parser::prelude::*; |
| 13 | |
| 14 | pub(crate) unsafe fn from_utf8_unchecked<'b>( |
| 15 | bytes: &'b [u8], |
| 16 | safety_justification: &'static str, |
| 17 | ) -> &'b str { |
| 18 | if cfg!(debug_assertions) { |
| 19 | // Catch problems more quickly when testing |
| 20 | std::str::from_utf8(bytes).expect(msg:safety_justification) |
| 21 | } else { |
| 22 | std::str::from_utf8_unchecked(bytes) |
| 23 | } |
| 24 | } |
| 25 | |
| 26 | // wschar = ( %x20 / ; Space |
| 27 | // %x09 ) ; Horizontal tab |
| 28 | pub(crate) const WSCHAR: (u8, u8) = (b' ' , b' \t' ); |
| 29 | |
| 30 | // ws = *wschar |
| 31 | pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
| 32 | take_whileMap, …, …>, …, …, …, …, …>(range:0.., WSCHAR) |
| 33 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` filters out on-ASCII" ) }) |
| 34 | .parse_next(input) |
| 35 | } |
| 36 | |
| 37 | // non-ascii = %x80-D7FF / %xE000-10FFFF |
| 38 | // - ASCII is 0xxxxxxx |
| 39 | // - First byte for UTF-8 is 11xxxxxx |
| 40 | // - Subsequent UTF-8 bytes are 10xxxxxx |
| 41 | pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff; |
| 42 | |
| 43 | // non-eol = %x09 / %x20-7E / non-ascii |
| 44 | pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) = |
| 45 | (0x09, 0x20..=0x7E, NON_ASCII); |
| 46 | |
| 47 | // comment-start-symbol = %x23 ; # |
| 48 | pub(crate) const COMMENT_START_SYMBOL: u8 = b'#' ; |
| 49 | |
| 50 | // comment = comment-start-symbol *non-eol |
| 51 | pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { |
| 52 | (COMMENT_START_SYMBOL, take_while(range:0.., NON_EOL)) |
| 53 | .recognize() |
| 54 | .parse_next(input) |
| 55 | } |
| 56 | |
| 57 | // newline = ( %x0A / ; LF |
| 58 | // %x0D.0A ) ; CRLF |
| 59 | pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> { |
| 60 | altimpl Parser, …, …>(( |
| 61 | one_of(LF).value(val:b' \n' ), |
| 62 | (one_of(CR), one_of(LF)).value(val:b' \n' ), |
| 63 | )) |
| 64 | .parse_next(input) |
| 65 | } |
| 66 | pub(crate) const LF: u8 = b' \n' ; |
| 67 | pub(crate) const CR: u8 = b' \r' ; |
| 68 | |
| 69 | // ws-newline = *( wschar / newline ) |
| 70 | pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
| 71 | repeatMap, …, …, …, …, …>, …, …, …>, …, …, …, …, …>( |
| 72 | range:0.., |
| 73 | parser:alt((newline.value(&b" \n" [..]), take_while(range:1.., WSCHAR))), |
| 74 | ) |
| 75 | .map(|()| ()) |
| 76 | .recognize() |
| 77 | .map(|b: &[u8]| unsafe { from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII" ) }) |
| 78 | .parse_next(input) |
| 79 | } |
| 80 | |
| 81 | // ws-newlines = newline *( wschar / newline ) |
| 82 | pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
| 83 | (newline, ws_newline) |
| 84 | .recognize() |
| 85 | .map(|b: &[u8]| unsafe { |
| 86 | from_utf8_unchecked(bytes:b, safety_justification:"`is_wschar` and `newline` filters out on-ASCII" ) |
| 87 | }) |
| 88 | .parse_next(input) |
| 89 | } |
| 90 | |
| 91 | // note: this rule is not present in the original grammar |
| 92 | // ws-comment-newline = *( ws-newline-nonempty / comment ) |
| 93 | pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { |
| 94 | repeatRecognize( |
| 95 | range:0.., |
| 96 | parser:alt(( |
| 97 | repeatRepeat, …, …>, …, …, …, …>( |
| 98 | range:1.., |
| 99 | parser:alt((take_while(range:1.., WSCHAR), newline.value(&b" \n" [..]))), |
| 100 | ) |
| 101 | .map(|()| ()), |
| 102 | comment.value(()), |
| 103 | )), |
| 104 | ) |
| 105 | .map(|()| ()) |
| 106 | .recognize() |
| 107 | .parse_next(input) |
| 108 | } |
| 109 | |
| 110 | // note: this rule is not present in the original grammar |
| 111 | // line-ending = newline / eof |
| 112 | pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> { |
| 113 | alt((newline.value(val:" \n" ), eof.value(val:"" ))).parse_next(input) |
| 114 | } |
| 115 | |
| 116 | // note: this rule is not present in the original grammar |
| 117 | // line-trailing = ws [comment] skip-line-ending |
| 118 | pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> { |
| 119 | terminated((ws, opt(comment)).span(), second:line_ending).parse_next(input) |
| 120 | } |
| 121 | |
| 122 | #[cfg (test)] |
| 123 | mod test { |
| 124 | use super::*; |
| 125 | |
| 126 | #[test ] |
| 127 | fn trivia() { |
| 128 | let inputs = [ |
| 129 | "" , |
| 130 | r#" "# , |
| 131 | r#" |
| 132 | "# , |
| 133 | r#" |
| 134 | # comment |
| 135 | |
| 136 | # comment2 |
| 137 | |
| 138 | |
| 139 | "# , |
| 140 | r#" |
| 141 | "# , |
| 142 | r#"# comment |
| 143 | # comment2 |
| 144 | |
| 145 | |
| 146 | "# , |
| 147 | ]; |
| 148 | for input in inputs { |
| 149 | dbg!(input); |
| 150 | let parsed = ws_comment_newline.parse(new_input(input)); |
| 151 | assert!(parsed.is_ok(), "{:?}" , parsed); |
| 152 | let parsed = parsed.unwrap(); |
| 153 | assert_eq!(parsed, input.as_bytes()); |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | |