1 | #[cfg (__unicase__iter_cmp)] |
2 | use core::cmp::Ordering; |
3 | use core::hash::{Hash, Hasher}; |
4 | |
5 | use self::map::lookup; |
6 | mod map; |
7 | |
8 | #[derive (Clone, Copy, Debug, Default)] |
9 | pub struct Unicode<S>(pub S); |
10 | |
11 | impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> { |
12 | #[inline ] |
13 | fn eq(&self, other: &Unicode<S2>) -> bool { |
14 | let mut left: impl Iterator = self.0.as_ref().chars().flat_map(lookup); |
15 | let mut right: impl Iterator = other.0.as_ref().chars().flat_map(lookup); |
16 | |
17 | // inline Iterator::eq since not added until Rust 1.5 |
18 | loop { |
19 | let x: char = match left.next() { |
20 | None => return right.next().is_none(), |
21 | Some(val: char) => val, |
22 | }; |
23 | |
24 | let y: char = match right.next() { |
25 | None => return false, |
26 | Some(val: char) => val, |
27 | }; |
28 | |
29 | if x != y { |
30 | return false; |
31 | } |
32 | } |
33 | } |
34 | } |
35 | |
36 | impl<S: AsRef<str>> Eq for Unicode<S> {} |
37 | |
38 | #[cfg (__unicase__iter_cmp)] |
39 | impl<T: AsRef<str>> PartialOrd for Unicode<T> { |
40 | #[inline ] |
41 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
42 | Some(self.cmp(other)) |
43 | } |
44 | } |
45 | |
46 | #[cfg (__unicase__iter_cmp)] |
47 | impl<T: AsRef<str>> Ord for Unicode<T> { |
48 | #[inline ] |
49 | fn cmp(&self, other: &Self) -> Ordering { |
50 | let self_chars: impl Iterator = self.0.as_ref().chars().flat_map(lookup); |
51 | let other_chars: impl Iterator = other.0.as_ref().chars().flat_map(lookup); |
52 | self_chars.cmp(other_chars) |
53 | } |
54 | } |
55 | |
56 | impl<S: AsRef<str>> Hash for Unicode<S> { |
57 | #[inline ] |
58 | fn hash<H: Hasher>(&self, hasher: &mut H) { |
59 | let mut buf: [u8; 4] = [0; 4]; |
60 | for c: char in self.0.as_ref().chars().flat_map(|c: char| lookup(orig:c)) { |
61 | let len: usize = char_to_utf8(c, &mut buf); |
62 | hasher.write(&buf[..len]) |
63 | } |
64 | } |
65 | } |
66 | |
67 | #[inline ] |
68 | fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize { |
69 | const TAG_CONT: u8 = 0b1000_0000; |
70 | const TAG_TWO_B: u8 = 0b1100_0000; |
71 | const TAG_THREE_B: u8 = 0b1110_0000; |
72 | const TAG_FOUR_B: u8 = 0b1111_0000; |
73 | |
74 | let code = c as u32; |
75 | if code <= 0x7F { |
76 | dst[0] = code as u8; |
77 | 1 |
78 | } else if code <= 0x7FF { |
79 | dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
80 | dst[1] = (code & 0x3F) as u8 | TAG_CONT; |
81 | 2 |
82 | } else if code <= 0xFFFF { |
83 | dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
84 | dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
85 | dst[2] = (code & 0x3F) as u8 | TAG_CONT; |
86 | 3 |
87 | } else { |
88 | dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
89 | dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
90 | dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
91 | dst[3] = (code & 0x3F) as u8 | TAG_CONT; |
92 | 4 |
93 | } |
94 | } |
95 | |
96 | // internal mod so that the enum can be 'pub' |
97 | // thanks privacy-checker :___( |
98 | mod fold { |
99 | #[derive (Clone, Copy)] |
100 | pub enum Fold { |
101 | Zero, |
102 | One(char), |
103 | Two(char, char), |
104 | Three(char, char, char), |
105 | } |
106 | |
107 | impl Iterator for Fold { |
108 | type Item = char; |
109 | #[inline ] |
110 | fn next(&mut self) -> Option<char> { |
111 | match *self { |
112 | Fold::Zero => None, |
113 | Fold::One(one) => { |
114 | *self = Fold::Zero; |
115 | Some(one) |
116 | } |
117 | Fold::Two(one, two) => { |
118 | *self = Fold::One(two); |
119 | Some(one) |
120 | } |
121 | Fold::Three(one, two, three) => { |
122 | *self = Fold::Two(one, two); |
123 | Some(three) |
124 | } |
125 | } |
126 | } |
127 | |
128 | #[inline ] |
129 | fn size_hint(&self) -> (usize, Option<usize>) { |
130 | match *self { |
131 | Fold::Zero => (0, Some(0)), |
132 | Fold::One(..) => (1, Some(1)), |
133 | Fold::Two(..) => (2, Some(2)), |
134 | Fold::Three(..) => (3, Some(3)), |
135 | } |
136 | } |
137 | } |
138 | impl From<(char,)> for Fold { |
139 | #[inline ] |
140 | fn from((one,): (char,)) -> Fold { |
141 | Fold::One(one) |
142 | } |
143 | } |
144 | |
145 | impl From<(char, char)> for Fold { |
146 | #[inline ] |
147 | fn from((one, two): (char, char)) -> Fold { |
148 | Fold::Two(one, two) |
149 | } |
150 | } |
151 | |
152 | impl From<(char, char, char)> for Fold { |
153 | #[inline ] |
154 | fn from((one, two, three): (char, char, char)) -> Fold { |
155 | Fold::Three(one, two, three) |
156 | } |
157 | } |
158 | } |
159 | |
160 | #[cfg (test)] |
161 | mod tests { |
162 | use super::Unicode; |
163 | |
164 | macro_rules! eq { |
165 | ($left:expr, $right:expr) => {{ |
166 | assert_eq!(Unicode($left), Unicode($right)); |
167 | }}; |
168 | } |
169 | |
170 | #[test ] |
171 | fn test_ascii_folding() { |
172 | eq!("foo bar" , "FoO BAR" ); |
173 | } |
174 | |
175 | #[test ] |
176 | fn test_simple_case_folding() { |
177 | eq!("στιγμας" , "στιγμασ" ); |
178 | } |
179 | |
180 | #[test ] |
181 | fn test_full_case_folding() { |
182 | eq!("flour" , "flour" ); |
183 | eq!("Maße" , "MASSE" ); |
184 | eq!("ᾲ στο διάολο" , "ὰι στο διάολο" ); |
185 | } |
186 | |
187 | #[cfg (feature = "nightly" )] |
188 | #[bench ] |
189 | fn bench_ascii_folding(b: &mut ::test::Bencher) { |
190 | b.bytes = b"foo bar" .len() as u64; |
191 | b.iter(|| eq!("foo bar" , "FoO BAR" )); |
192 | } |
193 | |
194 | #[cfg (feature = "nightly" )] |
195 | #[bench ] |
196 | fn bench_simple_case_folding(b: &mut ::test::Bencher) { |
197 | b.bytes = "στιγμας" .len() as u64; |
198 | b.iter(|| eq!("στιγμας" , "στιγμασ" )); |
199 | } |
200 | } |
201 | |