| 1 | use alloc::string::String; |
| 2 | use core::cmp::Ordering; |
| 3 | use core::hash::{Hash, Hasher}; |
| 4 | |
| 5 | use self::map::lookup; |
| 6 | mod map; |
| 7 | |
| 8 | #[derive (Clone, Copy, Debug, Default)] |
| 9 | pub struct Unicode<S>(pub S); |
| 10 | |
| 11 | impl<S: AsRef<str>> Unicode<S> { |
| 12 | pub fn to_folded_case(&self) -> String { |
| 13 | self.0.as_ref().chars().flat_map(lookup).collect() |
| 14 | } |
| 15 | } |
| 16 | |
| 17 | impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> { |
| 18 | #[inline ] |
| 19 | fn eq(&self, other: &Unicode<S2>) -> bool { |
| 20 | let mut left: impl Iterator = self.0.as_ref().chars().flat_map(lookup); |
| 21 | let mut right: impl Iterator = other.0.as_ref().chars().flat_map(lookup); |
| 22 | |
| 23 | // inline Iterator::eq since not added until Rust 1.5 |
| 24 | loop { |
| 25 | let x: char = match left.next() { |
| 26 | None => return right.next().is_none(), |
| 27 | Some(val: char) => val, |
| 28 | }; |
| 29 | |
| 30 | let y: char = match right.next() { |
| 31 | None => return false, |
| 32 | Some(val: char) => val, |
| 33 | }; |
| 34 | |
| 35 | if x != y { |
| 36 | return false; |
| 37 | } |
| 38 | } |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | impl<S: AsRef<str>> Eq for Unicode<S> {} |
| 43 | |
| 44 | impl<T: AsRef<str>> PartialOrd for Unicode<T> { |
| 45 | #[inline ] |
| 46 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
| 47 | Some(self.cmp(other)) |
| 48 | } |
| 49 | } |
| 50 | |
| 51 | impl<T: AsRef<str>> Ord for Unicode<T> { |
| 52 | #[inline ] |
| 53 | fn cmp(&self, other: &Self) -> Ordering { |
| 54 | let self_chars: impl Iterator = self.0.as_ref().chars().flat_map(lookup); |
| 55 | let other_chars: impl Iterator = other.0.as_ref().chars().flat_map(lookup); |
| 56 | self_chars.cmp(other_chars) |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | impl<S: AsRef<str>> Hash for Unicode<S> { |
| 61 | #[inline ] |
| 62 | fn hash<H: Hasher>(&self, hasher: &mut H) { |
| 63 | let mut buf: [u8; 4] = [0; 4]; |
| 64 | for c: char in self.0.as_ref().chars().flat_map(|c: char| lookup(orig:c)) { |
| 65 | let len: usize = char_to_utf8(c, &mut buf); |
| 66 | // we can't use `write(buf)` because the ASCII variant uses |
| 67 | // `write_u8`. The docs for Hash say that's technically different. |
| 68 | // ¯\_(ツ)_/¯ |
| 69 | for &b: u8 in &buf[..len] { |
| 70 | hasher.write_u8(b); |
| 71 | } |
| 72 | } |
| 73 | // prefix-freedom |
| 74 | hasher.write_u8(0xFF); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | #[inline ] |
| 79 | fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize { |
| 80 | const TAG_CONT: u8 = 0b1000_0000; |
| 81 | const TAG_TWO_B: u8 = 0b1100_0000; |
| 82 | const TAG_THREE_B: u8 = 0b1110_0000; |
| 83 | const TAG_FOUR_B: u8 = 0b1111_0000; |
| 84 | |
| 85 | let code = c as u32; |
| 86 | if code <= 0x7F { |
| 87 | dst[0] = code as u8; |
| 88 | 1 |
| 89 | } else if code <= 0x7FF { |
| 90 | dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
| 91 | dst[1] = (code & 0x3F) as u8 | TAG_CONT; |
| 92 | 2 |
| 93 | } else if code <= 0xFFFF { |
| 94 | dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
| 95 | dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 96 | dst[2] = (code & 0x3F) as u8 | TAG_CONT; |
| 97 | 3 |
| 98 | } else { |
| 99 | dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
| 100 | dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
| 101 | dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 102 | dst[3] = (code & 0x3F) as u8 | TAG_CONT; |
| 103 | 4 |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | // internal mod so that the enum can be 'pub' |
| 108 | // thanks privacy-checker :___( |
| 109 | mod fold { |
| 110 | #[derive (Clone, Copy)] |
| 111 | pub enum Fold { |
| 112 | Zero, |
| 113 | One(char), |
| 114 | Two(char, char), |
| 115 | Three(char, char, char), |
| 116 | } |
| 117 | |
| 118 | impl Iterator for Fold { |
| 119 | type Item = char; |
| 120 | #[inline ] |
| 121 | fn next(&mut self) -> Option<char> { |
| 122 | match *self { |
| 123 | Fold::Zero => None, |
| 124 | Fold::One(one) => { |
| 125 | *self = Fold::Zero; |
| 126 | Some(one) |
| 127 | } |
| 128 | Fold::Two(one, two) => { |
| 129 | *self = Fold::One(two); |
| 130 | Some(one) |
| 131 | } |
| 132 | Fold::Three(one, two, three) => { |
| 133 | *self = Fold::Two(one, two); |
| 134 | Some(three) |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | #[inline ] |
| 140 | fn size_hint(&self) -> (usize, Option<usize>) { |
| 141 | match *self { |
| 142 | Fold::Zero => (0, Some(0)), |
| 143 | Fold::One(..) => (1, Some(1)), |
| 144 | Fold::Two(..) => (2, Some(2)), |
| 145 | Fold::Three(..) => (3, Some(3)), |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | impl From<(char,)> for Fold { |
| 150 | #[inline ] |
| 151 | fn from((one,): (char,)) -> Fold { |
| 152 | Fold::One(one) |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | impl From<(char, char)> for Fold { |
| 157 | #[inline ] |
| 158 | fn from((one, two): (char, char)) -> Fold { |
| 159 | Fold::Two(one, two) |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | impl From<(char, char, char)> for Fold { |
| 164 | #[inline ] |
| 165 | fn from((one, two, three): (char, char, char)) -> Fold { |
| 166 | Fold::Three(one, two, three) |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | #[cfg (test)] |
| 172 | mod tests { |
| 173 | use super::Unicode; |
| 174 | |
| 175 | macro_rules! eq { |
| 176 | ($left:expr, $right:expr) => {{ |
| 177 | assert_eq!(Unicode($left), Unicode($right)); |
| 178 | }}; |
| 179 | } |
| 180 | |
| 181 | #[test ] |
| 182 | fn test_ascii_folding() { |
| 183 | eq!("foo bar" , "FoO BAR" ); |
| 184 | } |
| 185 | |
| 186 | #[test ] |
| 187 | fn test_simple_case_folding() { |
| 188 | eq!("στιγμας" , "στιγμασ" ); |
| 189 | } |
| 190 | |
| 191 | #[test ] |
| 192 | fn test_full_case_folding() { |
| 193 | eq!("flour" , "flour" ); |
| 194 | eq!("Maße" , "MASSE" ); |
| 195 | eq!("ᾲ στο διάολο" , "ὰι στο διάολο" ); |
| 196 | } |
| 197 | |
| 198 | #[test ] |
| 199 | fn test_to_folded_case() { |
| 200 | assert_eq!(Unicode("Maße" ).to_folded_case(), "masse" ); |
| 201 | } |
| 202 | |
| 203 | #[cfg (feature = "nightly" )] |
| 204 | #[bench ] |
| 205 | fn bench_ascii_folding(b: &mut ::test::Bencher) { |
| 206 | b.bytes = b"foo bar" .len() as u64; |
| 207 | b.iter(|| eq!("foo bar" , "FoO BAR" )); |
| 208 | } |
| 209 | |
| 210 | #[cfg (feature = "nightly" )] |
| 211 | #[bench ] |
| 212 | fn bench_simple_case_folding(b: &mut ::test::Bencher) { |
| 213 | b.bytes = "στιγμας" .len() as u64; |
| 214 | b.iter(|| eq!("στιγμας" , "στιγμασ" )); |
| 215 | } |
| 216 | } |
| 217 | |