1 | use std::cmp::Ordering; |
2 | |
3 | /// Compares strings by treating internal integers as atomic units. |
4 | pub fn natural_cmp(a: &str, b: &str) -> Ordering { |
5 | Iterator::cmp(self:Tokenizer { input: a }, other:Tokenizer { input: b }) |
6 | } |
7 | |
8 | #[inline ] |
9 | fn cmp_int(mut a: &str, mut b: &str) -> Ordering { |
10 | a = a.trim_start_matches('0' ); |
11 | b = b.trim_start_matches('0' ); |
12 | |
13 | // Compare to 0. |
14 | match (a.is_empty(), b.is_empty()) { |
15 | (true, true) => return Ordering::Equal, |
16 | (true, false) => return Ordering::Less, |
17 | (false, true) => return Ordering::Greater, |
18 | _ => {} |
19 | } |
20 | |
21 | // Compare length. |
22 | match a.len().cmp(&b.len()) { |
23 | Ordering::Equal => {} |
24 | ord: Ordering => return ord, |
25 | } |
26 | |
27 | // Compare digits. |
28 | a.cmp(b) |
29 | } |
30 | |
31 | #[derive (PartialEq, Eq)] |
32 | #[cfg_attr (test, derive(Debug))] |
33 | struct Token<'a> { |
34 | is_int: bool, |
35 | text: &'a str, |
36 | } |
37 | |
38 | impl PartialOrd for Token<'_> { |
39 | #[inline ] |
40 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
41 | Some(self.cmp(other)) |
42 | } |
43 | } |
44 | |
45 | impl Ord for Token<'_> { |
46 | #[inline ] |
47 | fn cmp(&self, other: &Self) -> Ordering { |
48 | if self.is_int && other.is_int { |
49 | cmp_int(self.text, b:other.text) |
50 | } else { |
51 | self.text.cmp(other.text) |
52 | } |
53 | } |
54 | } |
55 | |
56 | /// Lexes a string into "tokens". |
57 | struct Tokenizer<'a> { |
58 | /// The remaining characters to process. |
59 | input: &'a str, |
60 | } |
61 | |
62 | impl<'a> Iterator for Tokenizer<'a> { |
63 | type Item = Token<'a>; |
64 | |
65 | #[inline ] |
66 | fn next(&mut self) -> Option<Self::Item> { |
67 | let mut bytes = self.input.bytes(); |
68 | let is_int = bytes.next()?.is_ascii_digit(); |
69 | |
70 | let mut kind_len = 1; |
71 | for ch in bytes { |
72 | // Stop on character kind change. |
73 | if ch.is_ascii_digit() != is_int { |
74 | break; |
75 | } |
76 | |
77 | kind_len += 1; |
78 | } |
79 | |
80 | unsafe { |
81 | let text = self.input.get_unchecked(..kind_len); |
82 | self.input = self.input.get_unchecked(kind_len..); |
83 | |
84 | Some(Token { is_int, text }) |
85 | } |
86 | } |
87 | } |
88 | |
89 | #[cfg (test)] |
90 | mod tests { |
91 | use super::*; |
92 | |
93 | #[track_caller ] |
94 | fn test_sort(list: &[&str], cmp: fn(&str, &str) -> Ordering) { |
95 | let mut copy = list.to_vec(); |
96 | copy.sort_by(|a, b| cmp(a, b)); |
97 | assert_eq!(list, copy); |
98 | } |
99 | |
100 | #[test ] |
101 | fn natural_cmp() { |
102 | #[track_caller ] |
103 | fn test(list: &[&str]) { |
104 | test_sort(list, super::natural_cmp); |
105 | } |
106 | |
107 | test (&["A<4>" , "A<8>" , "A<16>" , "A<32>" , "A<64>" ]); |
108 | } |
109 | |
110 | #[test ] |
111 | fn cmp_int() { |
112 | #[track_caller ] |
113 | fn test(list: &[&str]) { |
114 | test_sort(list, super::cmp_int); |
115 | } |
116 | |
117 | test (&["4" , "8" , "16" , "32" , "64" ]); |
118 | test (&["4" , "08" ]); |
119 | test (&["0" , "00" ]); |
120 | } |
121 | |
122 | #[test ] |
123 | fn tokenize() { |
124 | #[track_caller ] |
125 | fn test(s: &str, expected: &[Token]) { |
126 | let tokens: Vec<Token> = Tokenizer { input: s }.collect(); |
127 | assert_eq!(tokens, expected); |
128 | } |
129 | |
130 | test ( |
131 | "A<4>" , |
132 | &[ |
133 | Token { text: "A<" , is_int: false }, |
134 | Token { text: "4" , is_int: true }, |
135 | Token { text: ">" , is_int: false }, |
136 | ], |
137 | ); |
138 | } |
139 | } |
140 | |