1 | pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator<Item = &'_ str> + '_ { |
2 | let mut start = 0; |
3 | let mut in_whitespace = false; |
4 | let mut char_indices = line.char_indices(); |
5 | |
6 | std::iter::from_fn(move || { |
7 | for (idx, ch) in char_indices.by_ref() { |
8 | let next_whitespace = ch == ' ' ; |
9 | if in_whitespace && !next_whitespace { |
10 | let word = &line[start..idx]; |
11 | start = idx; |
12 | in_whitespace = next_whitespace; |
13 | return Some(word); |
14 | } |
15 | |
16 | in_whitespace = next_whitespace; |
17 | } |
18 | |
19 | if start < line.len() { |
20 | let word = &line[start..]; |
21 | start = line.len(); |
22 | return Some(word); |
23 | } |
24 | |
25 | None |
26 | }) |
27 | } |
28 | |
29 | #[cfg (test)] |
30 | mod tests { |
31 | use super::*; |
32 | |
33 | macro_rules! test_find_words { |
34 | ($ascii_name:ident, |
35 | $([ $line:expr, $ascii_words:expr ]),+) => { |
36 | #[test] |
37 | fn $ascii_name() { |
38 | $( |
39 | let expected_words: Vec<&str> = $ascii_words.to_vec(); |
40 | let actual_words = find_words_ascii_space($line) |
41 | .collect::<Vec<_>>(); |
42 | assert_eq!(actual_words, expected_words, "Line: {:?}" , $line); |
43 | )+ |
44 | } |
45 | }; |
46 | } |
47 | |
48 | test_find_words!(ascii_space_empty, ["" , []]); |
49 | |
50 | test_find_words!(ascii_single_word, ["foo" , ["foo" ]]); |
51 | |
52 | test_find_words!(ascii_two_words, ["foo bar" , ["foo " , "bar" ]]); |
53 | |
54 | test_find_words!( |
55 | ascii_multiple_words, |
56 | ["foo bar" , ["foo " , "bar" ]], |
57 | ["x y z" , ["x " , "y " , "z" ]] |
58 | ); |
59 | |
60 | test_find_words!(ascii_only_whitespace, [" " , [" " ]], [" " , [" " ]]); |
61 | |
62 | test_find_words!( |
63 | ascii_inter_word_whitespace, |
64 | ["foo bar" , ["foo " , "bar" ]] |
65 | ); |
66 | |
67 | test_find_words!(ascii_trailing_whitespace, ["foo " , ["foo " ]]); |
68 | |
69 | test_find_words!(ascii_leading_whitespace, [" foo" , [" " , "foo" ]]); |
70 | |
71 | test_find_words!( |
72 | ascii_multi_column_char, |
73 | [" \u{1f920}" , [" \u{1f920}" ]] // cowboy emoji 🤠|
74 | ); |
75 | |
76 | test_find_words!( |
77 | ascii_hyphens, |
78 | ["foo-bar" , ["foo-bar" ]], |
79 | ["foo- bar" , ["foo- " , "bar" ]], |
80 | ["foo - bar" , ["foo " , "- " , "bar" ]], |
81 | ["foo -bar" , ["foo " , "-bar" ]] |
82 | ); |
83 | |
84 | test_find_words!(ascii_newline, ["foo \nbar" , ["foo \nbar" ]]); |
85 | |
86 | test_find_words!(ascii_tab, ["foo \tbar" , ["foo \tbar" ]]); |
87 | |
88 | test_find_words!( |
89 | ascii_non_breaking_space, |
90 | ["foo \u{00A0}bar" , ["foo \u{00A0}bar" ]] |
91 | ); |
92 | } |
93 | |