mod.rs source code [crates/unicase/src/unicode/mod.rs]

1	#[cfg(__unicase__iter_cmp)]
2	use core::cmp::Ordering;
3	use core::hash::{Hash, Hasher};
4
5	use self::map::lookup;
6	mod map;
7
8	#[derive(Clone, Copy, Debug, Default)]
9	pub struct Unicode<S>(pub S);
10
11	impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12	#[inline]
13	fn eq(&self, other: &Unicode<S2>) -> bool {
14	let mut left: impl Iterator = self.0.as_ref().chars().flat_map(lookup);
15	let mut right: impl Iterator = other.0.as_ref().chars().flat_map(lookup);
16
17	// inline Iterator::eq since not added until Rust 1.5
18	loop {
19	let x: char = match left.next() {
20	None => return right.next().is_none(),
21	Some(val: char) => val,
22	};
23
24	let y: char = match right.next() {
25	None => return `false`,
26	Some(val: char) => val,
27	};
28
29	if x != y {
30	return `false`;
31	}
32	}
33	}
34	}
35
36	impl<S: AsRef<str>> Eq for Unicode<S> {}
37
38	#[cfg(__unicase__iter_cmp)]
39	impl<T: AsRef<str>> PartialOrd for Unicode<T> {
40	#[inline]
41	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42	Some(self.cmp(other))
43	}
44	}
45
46	#[cfg(__unicase__iter_cmp)]
47	impl<T: AsRef<str>> Ord for Unicode<T> {
48	#[inline]
49	fn cmp(&self, other: &Self) -> Ordering {
50	let self_chars: impl Iterator = self.0.as_ref().chars().flat_map(lookup);
51	let other_chars: impl Iterator = other.0.as_ref().chars().flat_map(lookup);
52	self_chars.cmp(other_chars)
53	}
54	}
55
56	impl<S: AsRef<str>> Hash for Unicode<S> {
57	#[inline]
58	fn hash<H: Hasher>(&self, hasher: &mut H) {
59	let mut buf: [u8; 4] = [`0`; `4`];
60	for c: char in self.0.as_ref().chars().flat_map(\|c: char\| lookup(orig:c)) {
61	let len: usize = char_to_utf8(c, &mut buf);
62	hasher.write(&buf[..len])
63	}
64	}
65	}
66
67	#[inline]
68	fn char_to_utf8(c: char, dst: &mut [u8; `4`]) -> usize {
69	const TAG_CONT: u8 = `0b1000_0000`;
70	const TAG_TWO_B: u8 = `0b1100_0000`;
71	const TAG_THREE_B: u8 = `0b1110_0000`;
72	const TAG_FOUR_B: u8 = `0b1111_0000`;
73
74	let code = c as u32;
75	if code <= `0x7F` {
76	dst[`0`] = code as u8;
77	`1`
78	} else if code <= `0x7FF` {
79	dst[`0`] = (code >> `6` & `0x1F`) as u8 \| TAG_TWO_B;
80	dst[`1`] = (code & `0x3F`) as u8 \| TAG_CONT;
81	`2`
82	} else if code <= `0xFFFF` {
83	dst[`0`] = (code >> `12` & `0x0F`) as u8 \| TAG_THREE_B;
84	dst[`1`] = (code >> `6` & `0x3F`) as u8 \| TAG_CONT;
85	dst[`2`] = (code & `0x3F`) as u8 \| TAG_CONT;
86	`3`
87	} else {
88	dst[`0`] = (code >> `18` & `0x07`) as u8 \| TAG_FOUR_B;
89	dst[`1`] = (code >> `12` & `0x3F`) as u8 \| TAG_CONT;
90	dst[`2`] = (code >> `6` & `0x3F`) as u8 \| TAG_CONT;
91	dst[`3`] = (code & `0x3F`) as u8 \| TAG_CONT;
92	`4`
93	}
94	}
95
96	// internal mod so that the enum can be 'pub'
97	// thanks privacy-checker :___(
98	mod fold {
99	#[derive(Clone, Copy)]
100	pub enum Fold {
101	Zero,
102	One(char),
103	Two(char, char),
104	Three(char, char, char),
105	}
106
107	impl Iterator for Fold {
108	type Item = char;
109	#[inline]
110	fn next(&mut self) -> Option<char> {
111	match *self {
112	Fold::Zero => None,
113	Fold::One(one) => {
114	*self = Fold::Zero;
115	Some(one)
116	}
117	Fold::Two(one, two) => {
118	*self = Fold::One(two);
119	Some(one)
120	}
121	Fold::Three(one, two, three) => {
122	*self = Fold::Two(one, two);
123	Some(three)
124	}
125	}
126	}
127
128	#[inline]
129	fn size_hint(&self) -> (usize, Option<usize>) {
130	match *self {
131	Fold::Zero => (`0`, Some(`0`)),
132	Fold::One(..) => (`1`, Some(`1`)),
133	Fold::Two(..) => (`2`, Some(`2`)),
134	Fold::Three(..) => (`3`, Some(`3`)),
135	}
136	}
137	}
138	impl From<(char,)> for Fold {
139	#[inline]
140	fn from((one,): (char,)) -> Fold {
141	Fold::One(one)
142	}
143	}
144
145	impl From<(char, char)> for Fold {
146	#[inline]
147	fn from((one, two): (char, char)) -> Fold {
148	Fold::Two(one, two)
149	}
150	}
151
152	impl From<(char, char, char)> for Fold {
153	#[inline]
154	fn from((one, two, three): (char, char, char)) -> Fold {
155	Fold::Three(one, two, three)
156	}
157	}
158	}
159
160	#[cfg(test)]
161	mod tests {
162	use super::Unicode;
163
164	macro_rules! eq {
165	($left:expr, $right:expr) => {{
166	assert_eq!(Unicode($left), Unicode($right));
167	}};
168	}
169
170	#[test]
171	fn test_ascii_folding() {
172	eq!("foo bar", "FoO BAR");
173	}
174
175	#[test]
176	fn test_simple_case_folding() {
177	eq!("στιγμας", "στιγμασ");
178	}
179
180	#[test]
181	fn test_full_case_folding() {
182	eq!("ﬂour", "flour");
183	eq!("Maße", "MASSE");
184	eq!("ᾲ στο διάολο", "ὰι στο διάολο");
185	}
186
187	#[cfg(feature = "nightly")]
188	#[bench]
189	fn bench_ascii_folding(b: &mut ::test::Bencher) {
190	b.bytes = b"foo bar".len() as u64;
191	b.iter(\|\| eq!("foo bar", "FoO BAR"));
192	}
193
194	#[cfg(feature = "nightly")]
195	#[bench]
196	fn bench_simple_case_folding(b: &mut ::test::Bencher) {
197	b.bytes = "στιγμας".len() as u64;
198	b.iter(\|\| eq!("στιγμας", "στιγμασ"));
199	}
200	}
201