1use std::cmp::Ordering;
2
3/// Compares strings by treating internal integers as atomic units.
4pub fn natural_cmp(a: &str, b: &str) -> Ordering {
5 Iterator::cmp(self:Tokenizer { input: a }, other:Tokenizer { input: b })
6}
7
8#[inline]
9fn cmp_int(mut a: &str, mut b: &str) -> Ordering {
10 a = a.trim_start_matches('0');
11 b = b.trim_start_matches('0');
12
13 // Compare to 0.
14 match (a.is_empty(), b.is_empty()) {
15 (true, true) => return Ordering::Equal,
16 (true, false) => return Ordering::Less,
17 (false, true) => return Ordering::Greater,
18 _ => {}
19 }
20
21 // Compare length.
22 match a.len().cmp(&b.len()) {
23 Ordering::Equal => {}
24 ord: Ordering => return ord,
25 }
26
27 // Compare digits.
28 a.cmp(b)
29}
30
31#[derive(PartialEq, Eq)]
32#[cfg_attr(test, derive(Debug))]
33struct Token<'a> {
34 is_int: bool,
35 text: &'a str,
36}
37
38impl PartialOrd for Token<'_> {
39 #[inline]
40 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
41 Some(self.cmp(other))
42 }
43}
44
45impl Ord for Token<'_> {
46 #[inline]
47 fn cmp(&self, other: &Self) -> Ordering {
48 if self.is_int && other.is_int {
49 cmp_int(self.text, b:other.text)
50 } else {
51 self.text.cmp(other.text)
52 }
53 }
54}
55
56/// Lexes a string into "tokens".
57struct Tokenizer<'a> {
58 /// The remaining characters to process.
59 input: &'a str,
60}
61
62impl<'a> Iterator for Tokenizer<'a> {
63 type Item = Token<'a>;
64
65 #[inline]
66 fn next(&mut self) -> Option<Self::Item> {
67 let mut bytes = self.input.bytes();
68 let is_int = bytes.next()?.is_ascii_digit();
69
70 let mut kind_len = 1;
71 for ch in bytes {
72 // Stop on character kind change.
73 if ch.is_ascii_digit() != is_int {
74 break;
75 }
76
77 kind_len += 1;
78 }
79
80 unsafe {
81 let text = self.input.get_unchecked(..kind_len);
82 self.input = self.input.get_unchecked(kind_len..);
83
84 Some(Token { is_int, text })
85 }
86 }
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[track_caller]
94 fn test_sort(list: &[&str], cmp: fn(&str, &str) -> Ordering) {
95 let mut copy = list.to_vec();
96 copy.sort_by(|a, b| cmp(a, b));
97 assert_eq!(list, copy);
98 }
99
100 #[test]
101 fn natural_cmp() {
102 #[track_caller]
103 fn test(list: &[&str]) {
104 test_sort(list, super::natural_cmp);
105 }
106
107 test(&["A<4>", "A<8>", "A<16>", "A<32>", "A<64>"]);
108 }
109
110 #[test]
111 fn cmp_int() {
112 #[track_caller]
113 fn test(list: &[&str]) {
114 test_sort(list, super::cmp_int);
115 }
116
117 test(&["4", "8", "16", "32", "64"]);
118 test(&["4", "08"]);
119 test(&["0", "00"]);
120 }
121
122 #[test]
123 fn tokenize() {
124 #[track_caller]
125 fn test(s: &str, expected: &[Token]) {
126 let tokens: Vec<Token> = Tokenizer { input: s }.collect();
127 assert_eq!(tokens, expected);
128 }
129
130 test(
131 "A<4>",
132 &[
133 Token { text: "A<", is_int: false },
134 Token { text: "4", is_int: true },
135 Token { text: ">", is_int: false },
136 ],
137 );
138 }
139}
140