lib.rs source code [crates/unicase/src/lib.rs]

1	#![cfg_attr(test, deny(missing_docs))]
2	#![cfg_attr(test, deny(warnings))]
3	#![cfg_attr(feature = "nightly", feature(test))]
4	#![no_std]
5
6	//! # UniCase
7	//!
8	//! UniCase provides a way of specifying strings that are case-insensitive.
9	//!
10	//! UniCase supports full [Unicode case
11	//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
12	//! utilize faster ASCII case comparisons, if both strings are ASCII.
13	//!
14	//! Using the `UniCase::new()` constructor will check the string to see if it
15	//! is all ASCII. When a `UniCase` is compared against another, if both are
16	//! ASCII, it will use the faster comparison.
17	//!
18	//! There also exists the `Ascii` type in this crate, which will always assume
19	//! to use the ASCII case comparisons, if the encoding is already known.
20	//!
21	//! ## Example
22	//!
23	//! ```rust
24	//! use unicase::UniCase;
25	//!
26	//! let a = UniCase::new("Maße");
27	//! let b = UniCase::new("MASSE");
28	//! let c = UniCase::new("mase");
29	//!
30	//! assert_eq!(a, b);
31	//! assert!(b != c);
32	//! ```
33	//!
34	//! ## Ascii
35	//!
36	//! ```rust
37	//! use unicase::Ascii;
38	//!
39	//! let a = Ascii::new("foobar");
40	//! let b = Ascii::new("FoObAr");
41	//!
42	//! assert_eq!(a, b);
43	//! ```
44
45	#[cfg(test)]
46	extern crate std;
47	#[cfg(feature = "nightly")]
48	extern crate test;
49
50	extern crate alloc;
51	use alloc::string::String;
52
53	use alloc::borrow::Cow;
54	use core::cmp::Ordering;
55	use core::fmt;
56	use core::hash::{Hash, Hasher};
57	use core::ops::{Deref, DerefMut};
58	use core::str::FromStr;
59
60	use self::unicode::Unicode;
61
62	mod ascii;
63	mod unicode;
64
65	/// Case Insensitive wrapper of strings.
66	#[derive(Clone, Copy)]
67	pub struct UniCase<S>(Encoding<S>);
68
69	/// Case Insensitive wrapper of Ascii strings.
70	#[derive(Clone, Copy, Debug, Default)]
71	pub struct Ascii<S>(S);
72
73	/// Compare two string-like types for case-less equality, using unicode folding.
74	///
75	/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
76	///
77	/// Note: This will perform a scan for ASCII characters before doing the
78	/// the comparison. See `UniCase` for more information.
79	#[inline]
80	pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
81	UniCase::new(left) == UniCase::new(right)
82	}
83
84	/// Compare two string-like types for case-less equality, ignoring ASCII case.
85	///
86	/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
87	#[inline]
88	pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
89	Ascii(left) == Ascii(right)
90	}
91
92	#[derive(Clone, Copy, Debug)]
93	enum Encoding<S> {
94	Ascii(Ascii<S>),
95	Unicode(Unicode<S>),
96	}
97
98	macro_rules! inner {
99	(mut $e:expr) => {{
100	match &mut $e {
101	&mut Encoding::Ascii(ref mut s) => &mut s.`0`,
102	&mut Encoding::Unicode(ref mut s) => &mut s.`0`,
103	}
104	}};
105	($e:expr) => {{
106	match &$e {
107	&Encoding::Ascii(ref s) => &s.`0`,
108	&Encoding::Unicode(ref s) => &s.`0`,
109	}
110	}};
111	}
112
113	impl<S: AsRef<str> + Default> Default for UniCase<S> {
114	fn default() -> Self {
115	Self::new(Default::default())
116	}
117	}
118
119	impl<S: AsRef<str>> UniCase<S> {
120	/// Creates a new `UniCase`.
121	///
122	/// Note: This scans the text to determine if it is all ASCII or not.
123	pub fn new(s: S) -> UniCase<S> {
124	if s.as_ref().is_ascii() {
125	UniCase(Encoding::Ascii(Ascii(s)))
126	} else {
127	UniCase(Encoding::Unicode(Unicode(s)))
128	}
129	}
130
131	/// Returns a copy of this string where each character is mapped to its
132	/// Unicode CaseFolding equivalent.
133	///
134	/// # Note
135	///
136	/// Unicode Case Folding is meant for string storage and matching, not for
137	/// display.
138	pub fn to_folded_case(&self) -> String {
139	match self.0 {
140	Encoding::Ascii(ref s) => s.0.as_ref().to_ascii_lowercase(),
141	Encoding::Unicode(ref s) => s.to_folded_case(),
142	}
143	}
144	}
145
146	impl<S> UniCase<S> {
147	/// Creates a new `UniCase`, skipping the ASCII check.
148	pub const fn unicode(s: S) -> UniCase<S> {
149	UniCase(Encoding::Unicode(Unicode(s)))
150	}
151
152	/// Creates a new `UniCase` which performs only ASCII case folding.
153	pub const fn ascii(s: S) -> UniCase<S> {
154	UniCase(Encoding::Ascii(Ascii(s)))
155	}
156
157	/// Return `true` if this instance will only perform ASCII case folding.
158	pub fn is_ascii(&self) -> bool {
159	match self.0 {
160	Encoding::Ascii(_) => `true`,
161	Encoding::Unicode(_) => `false`,
162	}
163	}
164
165	/// Unwraps the inner value held by this `UniCase`.
166	#[inline]
167	pub fn into_inner(self) -> S {
168	match self.0 {
169	Encoding::Ascii(s) => s.0,
170	Encoding::Unicode(s) => s.0,
171	}
172	}
173	}
174
175	impl<S> Deref for UniCase<S> {
176	type Target = S;
177	#[inline]
178	fn deref<'a>(&'a self) -> &'a S {
179	inner!(self.0)
180	}
181	}
182
183	impl<S> DerefMut for UniCase<S> {
184	#[inline]
185	fn deref_mut<'a>(&'a mut self) -> &'a mut S {
186	inner!(mut self.0)
187	}
188	}
189
190	impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
191	#[inline]
192	fn as_ref(&self) -> &str {
193	inner!(self.0).as_ref()
194	}
195	}
196
197	impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
198	#[inline]
199	fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
200	fmt::Debug::fmt(self:inner!(self.0), f:fmt)
201	}
202	}
203
204	impl<S: fmt::Display> fmt::Display for UniCase<S> {
205	#[inline]
206	fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
207	fmt::Display::fmt(self:inner!(self.0), f:fmt)
208	}
209	}
210
211	impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
212	#[inline]
213	fn eq(&self, other: &UniCase<S2>) -> bool {
214	match (&self.0, &other.0) {
215	(&Encoding::Ascii(ref x: &Ascii), &Encoding::Ascii(ref y: &Ascii)) => x == y,
216	(&Encoding::Unicode(ref x: &Unicode), &Encoding::Unicode(ref y: &Unicode)) => x == y,
217	(&Encoding::Ascii(ref x: &Ascii), &Encoding::Unicode(ref y: &Unicode)) => &Unicode(x.as_ref()) == y,
218	(&Encoding::Unicode(ref x: &Unicode), &Encoding::Ascii(ref y: &Ascii)) => x == &Unicode(y.as_ref()),
219	}
220	}
221	}
222
223	impl<S: AsRef<str>> Eq for UniCase<S> {}
224
225	impl<S: AsRef<str>> Hash for UniCase<S> {
226	#[inline]
227	fn hash<H: Hasher>(&self, hasher: &mut H) {
228	match self.0 {
229	Encoding::Ascii(ref s: &Ascii~~) => s.hash(state:hasher),~~
230	Encoding::Unicode(ref s: &Unicode~~) => s.hash(state:hasher),~~
231	}
232	}
233	}
234
235	impl<S> From<Ascii<S>> for UniCase<S> {
236	fn from(ascii: Ascii<S>) -> Self {
237	UniCase(Encoding::Ascii(ascii))
238	}
239	}
240
241	macro_rules! from_impl {
242	($from:ty => $to:ty; $by:ident) => (
243	impl<'a> From<$from> for UniCase<$to> {
244	fn from(s: $from) -> Self {
245	UniCase::unicode(s.$by())
246	}
247	}
248	);
249	($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
250	}
251
252	macro_rules! into_impl {
253	($to:ty) => {
254	impl<'a> Into<$to> for UniCase<$to> {
255	fn into(self) -> $to {
256	self.into_inner()
257	}
258	}
259	};
260	}
261
262	impl<S: AsRef<str>> From<S> for UniCase<S> {
263	fn from(s: S) -> Self {
264	UniCase::unicode(s)
265	}
266	}
267
268	from_impl!(&'a str => Cow<'a, str>);
269	from_impl!(String => Cow<'a, str>);
270	from_impl!(&'a str => String);
271	from_impl!(Cow<'a, str> => String; into_owned);
272	from_impl!(&'a String => &'a str; as_ref);
273
274	into_impl!(&'a str);
275	into_impl!(String);
276	into_impl!(Cow<'a, str>);
277
278	impl<T: AsRef<str>> PartialOrd for UniCase<T> {
279	#[inline]
280	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
281	Some(self.cmp(other))
282	}
283	}
284
285	impl<T: AsRef<str>> Ord for UniCase<T> {
286	#[inline]
287	fn cmp(&self, other: &Self) -> Ordering {
288	match (&self.0, &other.0) {
289	(&Encoding::Ascii(ref x: &Ascii), &Encoding::Ascii(ref y: &Ascii)) => x.cmp(y),
290	(&Encoding::Unicode(ref x: &Unicode), &Encoding::Unicode(ref y: &Unicode)) => x.cmp(y),
291	(&Encoding::Ascii(ref x: &Ascii), &Encoding::Unicode(ref y: &Unicode)) => {
292	Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref()))
293	}
294	(&Encoding::Unicode(ref x: &Unicode), &Encoding::Ascii(ref y: &Ascii)) => {
295	Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref()))
296	}
297	}
298	}
299	}
300
301	impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
302	type Err = <S as FromStr>::Err;
303	fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
304	s.parse().map(op:UniCase::new)
305	}
306	}
307
308	#[cfg(test)]
309	mod tests {
310	use super::UniCase;
311	use std::borrow::ToOwned;
312	use std::collections::hash_map::DefaultHasher;
313	use std::hash::{Hash, Hasher};
314	use std::string::String;
315
316	fn hash<T: Hash>(t: &T) -> u64 {
317	let mut s = DefaultHasher::new();
318	t.hash(&mut s);
319	s.finish()
320	}
321
322	#[test]
323	fn test_copy_for_refs() {
324	fn foo<T>(_: UniCase<T>) {}
325
326	let a = UniCase::new("foobar");
327	foo(a);
328	foo(a);
329	}
330
331	#[test]
332	fn test_eq_ascii() {
333	let a = UniCase::new("foobar");
334	let b = UniCase::new("FOOBAR");
335	let c = UniCase::ascii("FoObAr");
336
337	assert_eq!(a, b);
338	assert_eq!(b, a);
339	assert_eq!(a, c);
340	assert_eq!(c, a);
341	assert_eq!(hash(&a), hash(&b));
342	assert_eq!(hash(&a), hash(&c));
343	assert!(a.is_ascii());
344	assert!(b.is_ascii());
345	assert!(c.is_ascii());
346	}
347
348	#[test]
349	fn test_eq_unicode() {
350	let a = UniCase::new("στιγμας");
351	let b = UniCase::new("στιγμασ");
352	assert_eq!(a, b);
353	assert_eq!(b, a);
354	assert_eq!(hash(&a), hash(&b));
355	}
356
357	#[test]
358	fn test_eq_unicode_left_is_substring() {
359	// https://github.com/seanmonstar/unicase/issues/38
360	let a = UniCase::unicode("foo");
361	let b = UniCase::unicode("foobar");
362
363	assert!(a != b);
364	assert!(b != a);
365	}
366
367	#[cfg(feature = "nightly")]
368	#[bench]
369	fn bench_unicase_ascii(b: &mut ::test::Bencher) {
370	b.bytes = b"foobar".len() as u64;
371	let x = UniCase::new("foobar");
372	let y = UniCase::new("FOOBAR");
373	b.iter(\|\| assert_eq!(x, y));
374	}
375
376	#[cfg(feature = "nightly")]
377	static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
378
379	#[cfg(feature = "nightly")]
380	#[inline(never)]
381	fn is_ascii(bytes: &[u8]) -> bool {
382	#[allow(unused, deprecated)]
383	use std::ascii::AsciiExt;
384	bytes.is_ascii()
385	}
386
387	#[cfg(feature = "nightly")]
388	#[bench]
389	fn bench_is_ascii(b: &mut ::test::Bencher) {
390	b.iter(\|\| assert!(is_ascii(SUBJECT)));
391	}
392
393	#[cfg(feature = "nightly")]
394	#[bench]
395	fn bench_is_utf8(b: &mut ::test::Bencher) {
396	b.iter(\|\| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
397	}
398
399	#[test]
400	fn test_case_cmp() {
401	assert!(UniCase::new("a") < UniCase::new("B"));
402
403	assert!(UniCase::new("A") < UniCase::new("b"));
404	assert!(UniCase::new("aa") > UniCase::new("a"));
405
406	assert!(UniCase::new("a") < UniCase::new("aa"));
407	assert!(UniCase::new("a") < UniCase::new("AA"));
408	}
409
410	#[test]
411	fn test_from_impls() {
412	let view: &'static str = "foobar";
413	let _: UniCase<&'static str> = view.into();
414	let _: UniCase<&str> = view.into();
415	let _: UniCase<String> = view.into();
416
417	let owned: String = view.to_owned();
418	let _: UniCase<&str> = (&owned).into();
419	let _: UniCase<String> = owned.into();
420	}
421
422	#[test]
423	fn test_into_impls() {
424	let view: UniCase<&'static str> = UniCase::new("foobar");
425	let _: &'static str = view.into();
426	let _: &str = view.into();
427
428	let owned: UniCase<String> = "foobar".into();
429	let _: String = owned.clone().into();
430	let _: &str = owned.as_ref();
431	}
432
433	#[test]
434	fn test_unicase_unicode_const() {
435	const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
436	}
437	}
438