1#[cfg(__unicase__iter_cmp)]
2use core::cmp::Ordering;
3use core::hash::{Hash, Hasher};
4
5use self::map::lookup;
6mod map;
7
8#[derive(Clone, Copy, Debug, Default)]
9pub struct Unicode<S>(pub S);
10
11impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12 #[inline]
13 fn eq(&self, other: &Unicode<S2>) -> bool {
14 let mut left: impl Iterator = self.0.as_ref().chars().flat_map(lookup);
15 let mut right: impl Iterator = other.0.as_ref().chars().flat_map(lookup);
16
17 // inline Iterator::eq since not added until Rust 1.5
18 loop {
19 let x: char = match left.next() {
20 None => return right.next().is_none(),
21 Some(val: char) => val,
22 };
23
24 let y: char = match right.next() {
25 None => return false,
26 Some(val: char) => val,
27 };
28
29 if x != y {
30 return false;
31 }
32 }
33 }
34}
35
36impl<S: AsRef<str>> Eq for Unicode<S> {}
37
38#[cfg(__unicase__iter_cmp)]
39impl<T: AsRef<str>> PartialOrd for Unicode<T> {
40 #[inline]
41 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42 Some(self.cmp(other))
43 }
44}
45
46#[cfg(__unicase__iter_cmp)]
47impl<T: AsRef<str>> Ord for Unicode<T> {
48 #[inline]
49 fn cmp(&self, other: &Self) -> Ordering {
50 let self_chars: impl Iterator = self.0.as_ref().chars().flat_map(lookup);
51 let other_chars: impl Iterator = other.0.as_ref().chars().flat_map(lookup);
52 self_chars.cmp(other_chars)
53 }
54}
55
56impl<S: AsRef<str>> Hash for Unicode<S> {
57 #[inline]
58 fn hash<H: Hasher>(&self, hasher: &mut H) {
59 let mut buf: [u8; 4] = [0; 4];
60 for c: char in self.0.as_ref().chars().flat_map(|c: char| lookup(orig:c)) {
61 let len: usize = char_to_utf8(c, &mut buf);
62 hasher.write(&buf[..len])
63 }
64 }
65}
66
67#[inline]
68fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
69 const TAG_CONT: u8 = 0b1000_0000;
70 const TAG_TWO_B: u8 = 0b1100_0000;
71 const TAG_THREE_B: u8 = 0b1110_0000;
72 const TAG_FOUR_B: u8 = 0b1111_0000;
73
74 let code = c as u32;
75 if code <= 0x7F {
76 dst[0] = code as u8;
77 1
78 } else if code <= 0x7FF {
79 dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
80 dst[1] = (code & 0x3F) as u8 | TAG_CONT;
81 2
82 } else if code <= 0xFFFF {
83 dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
84 dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
85 dst[2] = (code & 0x3F) as u8 | TAG_CONT;
86 3
87 } else {
88 dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
89 dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
90 dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
91 dst[3] = (code & 0x3F) as u8 | TAG_CONT;
92 4
93 }
94}
95
96// internal mod so that the enum can be 'pub'
97// thanks privacy-checker :___(
98mod fold {
99 #[derive(Clone, Copy)]
100 pub enum Fold {
101 Zero,
102 One(char),
103 Two(char, char),
104 Three(char, char, char),
105 }
106
107 impl Iterator for Fold {
108 type Item = char;
109 #[inline]
110 fn next(&mut self) -> Option<char> {
111 match *self {
112 Fold::Zero => None,
113 Fold::One(one) => {
114 *self = Fold::Zero;
115 Some(one)
116 }
117 Fold::Two(one, two) => {
118 *self = Fold::One(two);
119 Some(one)
120 }
121 Fold::Three(one, two, three) => {
122 *self = Fold::Two(one, two);
123 Some(three)
124 }
125 }
126 }
127
128 #[inline]
129 fn size_hint(&self) -> (usize, Option<usize>) {
130 match *self {
131 Fold::Zero => (0, Some(0)),
132 Fold::One(..) => (1, Some(1)),
133 Fold::Two(..) => (2, Some(2)),
134 Fold::Three(..) => (3, Some(3)),
135 }
136 }
137 }
138 impl From<(char,)> for Fold {
139 #[inline]
140 fn from((one,): (char,)) -> Fold {
141 Fold::One(one)
142 }
143 }
144
145 impl From<(char, char)> for Fold {
146 #[inline]
147 fn from((one, two): (char, char)) -> Fold {
148 Fold::Two(one, two)
149 }
150 }
151
152 impl From<(char, char, char)> for Fold {
153 #[inline]
154 fn from((one, two, three): (char, char, char)) -> Fold {
155 Fold::Three(one, two, three)
156 }
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::Unicode;
163
164 macro_rules! eq {
165 ($left:expr, $right:expr) => {{
166 assert_eq!(Unicode($left), Unicode($right));
167 }};
168 }
169
170 #[test]
171 fn test_ascii_folding() {
172 eq!("foo bar", "FoO BAR");
173 }
174
175 #[test]
176 fn test_simple_case_folding() {
177 eq!("στιγμας", "στιγμασ");
178 }
179
180 #[test]
181 fn test_full_case_folding() {
182 eq!("flour", "flour");
183 eq!("Maße", "MASSE");
184 eq!("ᾲ στο διάολο", "ὰι στο διάολο");
185 }
186
187 #[cfg(feature = "nightly")]
188 #[bench]
189 fn bench_ascii_folding(b: &mut ::test::Bencher) {
190 b.bytes = b"foo bar".len() as u64;
191 b.iter(|| eq!("foo bar", "FoO BAR"));
192 }
193
194 #[cfg(feature = "nightly")]
195 #[bench]
196 fn bench_simple_case_folding(b: &mut ::test::Bencher) {
197 b.bytes = "στιγμας".len() as u64;
198 b.iter(|| eq!("στιγμας", "στιγμασ"));
199 }
200}
201