| 1 | #![allow ( |
| 2 | clippy::let_underscore_untyped, |
| 3 | clippy::manual_range_contains, |
| 4 | clippy::needless_pass_by_value, |
| 5 | clippy::type_complexity |
| 6 | )] |
| 7 | |
| 8 | #[path = "../src/tokens.rs" ] |
| 9 | #[allow (dead_code)] |
| 10 | mod tokens; |
| 11 | |
| 12 | use crate::tokens::{Error, Token, Tokenizer}; |
| 13 | use std::borrow::Cow; |
| 14 | |
| 15 | fn err(input: &str, err: Error) { |
| 16 | let mut t = Tokenizer::new(input); |
| 17 | let token = t.next().unwrap_err(); |
| 18 | assert_eq!(token, err); |
| 19 | assert!(t.next().unwrap().is_none()); |
| 20 | } |
| 21 | |
| 22 | #[test] |
| 23 | fn literal_strings() { |
| 24 | fn t(input: &str, val: &str, multiline: bool) { |
| 25 | let mut t = Tokenizer::new(input); |
| 26 | let (_, token) = t.next().unwrap().unwrap(); |
| 27 | assert_eq!( |
| 28 | token, |
| 29 | Token::String { |
| 30 | src: input, |
| 31 | val: Cow::Borrowed(val), |
| 32 | multiline, |
| 33 | } |
| 34 | ); |
| 35 | assert!(t.next().unwrap().is_none()); |
| 36 | } |
| 37 | |
| 38 | t("''" , "" , false); |
| 39 | t("''''''" , "" , true); |
| 40 | t("''' \n'''" , "" , true); |
| 41 | t("'a'" , "a" , false); |
| 42 | t("' \"a'" , " \"a" , false); |
| 43 | t("''''a'''" , "'a" , true); |
| 44 | t("''' \n'a \n'''" , "'a \n" , true); |
| 45 | t("'''a \n'a \r\n'''" , "a \n'a \n" , true); |
| 46 | } |
| 47 | |
| 48 | #[test] |
| 49 | fn basic_strings() { |
| 50 | fn t(input: &str, val: &str, multiline: bool) { |
| 51 | let mut t = Tokenizer::new(input); |
| 52 | let (_, token) = t.next().unwrap().unwrap(); |
| 53 | assert_eq!( |
| 54 | token, |
| 55 | Token::String { |
| 56 | src: input, |
| 57 | val: Cow::Borrowed(val), |
| 58 | multiline, |
| 59 | } |
| 60 | ); |
| 61 | assert!(t.next().unwrap().is_none()); |
| 62 | } |
| 63 | |
| 64 | t(r#""""# , "" , false); |
| 65 | t(r#""""""""# , "" , true); |
| 66 | t(r#""a""# , "a" , false); |
| 67 | t(r#""""a""""# , "a" , true); |
| 68 | t(r#""\t""# , " \t" , false); |
| 69 | t(r#""\u0000""# , " \0" , false); |
| 70 | t(r#""\U00000000""# , " \0" , false); |
| 71 | t(r#""\U000A0000""# , " \u{A0000}" , false); |
| 72 | t(r#""\\t""# , " \\t" , false); |
| 73 | t(" \"\t\"" , " \t" , false); |
| 74 | t(" \"\"\"\n\t\"\"\"" , " \t" , true); |
| 75 | t(" \"\"\"\\\n\"\"\"" , "" , true); |
| 76 | t( |
| 77 | " \"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"" , |
| 78 | "" , |
| 79 | true, |
| 80 | ); |
| 81 | t(r#""\r""# , " \r" , false); |
| 82 | t(r#""\n""# , " \n" , false); |
| 83 | t(r#""\b""# , " \u{8}" , false); |
| 84 | t(r#""a\fa""# , "a \u{c}a" , false); |
| 85 | t(r#""\"a""# , " \"a" , false); |
| 86 | t(" \"\"\"\na \"\"\"" , "a" , true); |
| 87 | t(" \"\"\"\n\"\"\"" , "" , true); |
| 88 | t(r#""""a\"""b""""# , "a \"\"\"b" , true); |
| 89 | err(r#""\a"# , Error::InvalidEscape(2, 'a' )); |
| 90 | err(" \"\\\n" , Error::InvalidEscape(2, ' \n' )); |
| 91 | err(" \"\\\r\n" , Error::InvalidEscape(2, ' \n' )); |
| 92 | err(" \"\\" , Error::UnterminatedString(0)); |
| 93 | err(" \"\u{0}" , Error::InvalidCharInString(1, ' \u{0}' )); |
| 94 | err(r#""\U00""# , Error::InvalidHexEscape(5, '"' )); |
| 95 | err(r#""\U00"# , Error::UnterminatedString(0)); |
| 96 | err(r#""\uD800"# , Error::InvalidEscapeValue(2, 0xd800)); |
| 97 | err(r#""\UFFFFFFFF"# , Error::InvalidEscapeValue(2, 0xffff_ffff)); |
| 98 | } |
| 99 | |
| 100 | #[test] |
| 101 | fn keylike() { |
| 102 | fn t(input: &str) { |
| 103 | let mut t = Tokenizer::new(input); |
| 104 | let (_, token) = t.next().unwrap().unwrap(); |
| 105 | assert_eq!(token, Token::Keylike(input)); |
| 106 | assert!(t.next().unwrap().is_none()); |
| 107 | } |
| 108 | t("foo" ); |
| 109 | t("0bar" ); |
| 110 | t("bar0" ); |
| 111 | t("1234" ); |
| 112 | t("a-b" ); |
| 113 | t("a_B" ); |
| 114 | t("-_-" ); |
| 115 | t("___" ); |
| 116 | } |
| 117 | |
| 118 | #[test] |
| 119 | fn all() { |
| 120 | fn t(input: &str, expected: &[((usize, usize), Token, &str)]) { |
| 121 | let mut tokens = Tokenizer::new(input); |
| 122 | let mut actual: Vec<((usize, usize), Token, &str)> = Vec::new(); |
| 123 | while let Some((span, token)) = tokens.next().unwrap() { |
| 124 | actual.push((span.into(), token, &input[span.start..span.end])); |
| 125 | } |
| 126 | for (a, b) in actual.iter().zip(expected) { |
| 127 | assert_eq!(a, b); |
| 128 | } |
| 129 | assert_eq!(actual.len(), expected.len()); |
| 130 | } |
| 131 | |
| 132 | t( |
| 133 | " a " , |
| 134 | &[ |
| 135 | ((0, 1), Token::Whitespace(" " ), " " ), |
| 136 | ((1, 2), Token::Keylike("a" ), "a" ), |
| 137 | ((2, 3), Token::Whitespace(" " ), " " ), |
| 138 | ], |
| 139 | ); |
| 140 | |
| 141 | t( |
| 142 | " a \t [[]] \t [] {} , . = \n# foo \r\n#foo \n " , |
| 143 | &[ |
| 144 | ((0, 1), Token::Whitespace(" " ), " " ), |
| 145 | ((1, 2), Token::Keylike("a" ), "a" ), |
| 146 | ((2, 4), Token::Whitespace(" \t " ), " \t " ), |
| 147 | ((4, 5), Token::LeftBracket, "[" ), |
| 148 | ((5, 6), Token::LeftBracket, "[" ), |
| 149 | ((6, 7), Token::RightBracket, "]" ), |
| 150 | ((7, 8), Token::RightBracket, "]" ), |
| 151 | ((8, 11), Token::Whitespace(" \t " ), " \t " ), |
| 152 | ((11, 12), Token::LeftBracket, "[" ), |
| 153 | ((12, 13), Token::RightBracket, "]" ), |
| 154 | ((13, 14), Token::Whitespace(" " ), " " ), |
| 155 | ((14, 15), Token::LeftBrace, "{" ), |
| 156 | ((15, 16), Token::RightBrace, "}" ), |
| 157 | ((16, 17), Token::Whitespace(" " ), " " ), |
| 158 | ((17, 18), Token::Comma, "," ), |
| 159 | ((18, 19), Token::Whitespace(" " ), " " ), |
| 160 | ((19, 20), Token::Period, "." ), |
| 161 | ((20, 21), Token::Whitespace(" " ), " " ), |
| 162 | ((21, 22), Token::Equals, "=" ), |
| 163 | ((22, 23), Token::Newline, " \n" ), |
| 164 | ((23, 29), Token::Comment("# foo " ), "# foo " ), |
| 165 | ((29, 31), Token::Newline, " \r\n" ), |
| 166 | ((31, 36), Token::Comment("#foo " ), "#foo " ), |
| 167 | ((36, 37), Token::Newline, " \n" ), |
| 168 | ((37, 38), Token::Whitespace(" " ), " " ), |
| 169 | ], |
| 170 | ); |
| 171 | } |
| 172 | |
| 173 | #[test] |
| 174 | fn bare_cr_bad() { |
| 175 | err(" \r" , Error::Unexpected(0, ' \r' )); |
| 176 | err("' \n" , Error::NewlineInString(1)); |
| 177 | err("' \u{0}" , Error::InvalidCharInString(1, ' \u{0}' )); |
| 178 | err("'" , Error::UnterminatedString(0)); |
| 179 | err(" \u{0}" , Error::Unexpected(0, ' \u{0}' )); |
| 180 | } |
| 181 | |
| 182 | #[test] |
| 183 | fn bad_comment() { |
| 184 | let mut t = Tokenizer::new("# \u{0}" ); |
| 185 | t.next().unwrap().unwrap(); |
| 186 | assert_eq!(t.next(), Err(Error::Unexpected(1, ' \u{0}' ))); |
| 187 | assert!(t.next().unwrap().is_none()); |
| 188 | } |
| 189 | |