| 1 | pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator<Item = &'_ str> + '_ { |
| 2 | let mut start = 0; |
| 3 | let mut in_whitespace = false; |
| 4 | let mut char_indices = line.char_indices(); |
| 5 | |
| 6 | std::iter::from_fn(move || { |
| 7 | for (idx, ch) in char_indices.by_ref() { |
| 8 | let next_whitespace = ch == ' ' ; |
| 9 | if in_whitespace && !next_whitespace { |
| 10 | let word = &line[start..idx]; |
| 11 | start = idx; |
| 12 | in_whitespace = next_whitespace; |
| 13 | return Some(word); |
| 14 | } |
| 15 | |
| 16 | in_whitespace = next_whitespace; |
| 17 | } |
| 18 | |
| 19 | if start < line.len() { |
| 20 | let word = &line[start..]; |
| 21 | start = line.len(); |
| 22 | return Some(word); |
| 23 | } |
| 24 | |
| 25 | None |
| 26 | }) |
| 27 | } |
| 28 | |
| 29 | #[cfg (test)] |
| 30 | mod tests { |
| 31 | use super::*; |
| 32 | |
| 33 | macro_rules! test_find_words { |
| 34 | ($ascii_name:ident, |
| 35 | $([ $line:expr, $ascii_words:expr ]),+) => { |
| 36 | #[test] |
| 37 | fn $ascii_name() { |
| 38 | $( |
| 39 | let expected_words: Vec<&str> = $ascii_words.to_vec(); |
| 40 | let actual_words = find_words_ascii_space($line) |
| 41 | .collect::<Vec<_>>(); |
| 42 | assert_eq!(actual_words, expected_words, "Line: {:?}" , $line); |
| 43 | )+ |
| 44 | } |
| 45 | }; |
| 46 | } |
| 47 | |
| 48 | test_find_words!(ascii_space_empty, ["" , []]); |
| 49 | |
| 50 | test_find_words!(ascii_single_word, ["foo" , ["foo" ]]); |
| 51 | |
| 52 | test_find_words!(ascii_two_words, ["foo bar" , ["foo " , "bar" ]]); |
| 53 | |
| 54 | test_find_words!( |
| 55 | ascii_multiple_words, |
| 56 | ["foo bar" , ["foo " , "bar" ]], |
| 57 | ["x y z" , ["x " , "y " , "z" ]] |
| 58 | ); |
| 59 | |
| 60 | test_find_words!(ascii_only_whitespace, [" " , [" " ]], [" " , [" " ]]); |
| 61 | |
| 62 | test_find_words!( |
| 63 | ascii_inter_word_whitespace, |
| 64 | ["foo bar" , ["foo " , "bar" ]] |
| 65 | ); |
| 66 | |
| 67 | test_find_words!(ascii_trailing_whitespace, ["foo " , ["foo " ]]); |
| 68 | |
| 69 | test_find_words!(ascii_leading_whitespace, [" foo" , [" " , "foo" ]]); |
| 70 | |
| 71 | test_find_words!( |
| 72 | ascii_multi_column_char, |
| 73 | [" \u{1f920}" , [" \u{1f920}" ]] // cowboy emoji 🤠|
| 74 | ); |
| 75 | |
| 76 | test_find_words!( |
| 77 | ascii_hyphens, |
| 78 | ["foo-bar" , ["foo-bar" ]], |
| 79 | ["foo- bar" , ["foo- " , "bar" ]], |
| 80 | ["foo - bar" , ["foo " , "- " , "bar" ]], |
| 81 | ["foo -bar" , ["foo " , "-bar" ]] |
| 82 | ); |
| 83 | |
| 84 | test_find_words!(ascii_newline, ["foo \nbar" , ["foo \nbar" ]]); |
| 85 | |
| 86 | test_find_words!(ascii_tab, ["foo \tbar" , ["foo \tbar" ]]); |
| 87 | |
| 88 | test_find_words!( |
| 89 | ascii_non_breaking_space, |
| 90 | ["foo \u{00A0}bar" , ["foo \u{00A0}bar" ]] |
| 91 | ); |
| 92 | } |
| 93 | |