1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3#![doc(html_root_url = "https://docs.rs/unicase/2.7.0")]
4#![cfg_attr(feature = "nightly", feature(test))]
5#![cfg_attr(all(__unicase__core_and_alloc, not(test),), no_std)]
6
7//! # UniCase
8//!
9//! UniCase provides a way of specifying strings that are case-insensitive.
10//!
11//! UniCase supports full [Unicode case
12//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
13//! utilize faster ASCII case comparisons, if both strings are ASCII.
14//!
15//! Using the `UniCase::new()` constructor will check the string to see if it
16//! is all ASCII. When a `UniCase` is compared against another, if both are
17//! ASCII, it will use the faster comparison.
18//!
19//! There also exists the `Ascii` type in this crate, which will always assume
20//! to use the ASCII case comparisons, if the encoding is already known.
21//!
22//! ## Example
23//!
24//! ```rust
25//! use unicase::UniCase;
26//!
27//! let a = UniCase::new("Maße");
28//! let b = UniCase::new("MASSE");
29//! let c = UniCase::new("mase");
30//!
31//! assert_eq!(a, b);
32//! assert!(b != c);
33//! ```
34//!
35//! ## Ascii
36//!
37//! ```rust
38//! use unicase::Ascii;
39//!
40//! let a = Ascii::new("foobar");
41//! let b = Ascii::new("FoObAr");
42//!
43//! assert_eq!(a, b);
44//! ```
45
46#[cfg(feature = "nightly")]
47extern crate test;
48
49#[cfg(all(__unicase__core_and_alloc, not(test)))]
50extern crate alloc;
51#[cfg(all(__unicase__core_and_alloc, not(test)))]
52use alloc::string::String;
53
54#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
55extern crate std as alloc;
56#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
57extern crate std as core;
58
59use alloc::borrow::Cow;
60#[cfg(__unicase__iter_cmp)]
61use core::cmp::Ordering;
62use core::fmt;
63use core::hash::{Hash, Hasher};
64use core::ops::{Deref, DerefMut};
65use core::str::FromStr;
66
67use self::unicode::Unicode;
68
69mod ascii;
70mod unicode;
71
72/// Case Insensitive wrapper of strings.
73#[derive(Clone, Copy)]
74pub struct UniCase<S>(Encoding<S>);
75
76/// Case Insensitive wrapper of Ascii strings.
77#[derive(Clone, Copy, Debug, Default)]
78pub struct Ascii<S>(S);
79
80/// Compare two string-like types for case-less equality, using unicode folding.
81///
82/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
83///
84/// Note: This will perform a scan for ASCII characters before doing the
85/// the comparison. See `UniCase` for more information.
86#[inline]
87pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
88 UniCase::new(left) == UniCase::new(right)
89}
90
91/// Compare two string-like types for case-less equality, ignoring ASCII case.
92///
93/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
94#[inline]
95pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
96 Ascii(left) == Ascii(right)
97}
98
99#[derive(Clone, Copy, Debug)]
100enum Encoding<S> {
101 Ascii(Ascii<S>),
102 Unicode(Unicode<S>),
103}
104
105macro_rules! inner {
106 (mut $e:expr) => {{
107 match &mut $e {
108 &mut Encoding::Ascii(ref mut s) => &mut s.0,
109 &mut Encoding::Unicode(ref mut s) => &mut s.0,
110 }
111 }};
112 ($e:expr) => {{
113 match &$e {
114 &Encoding::Ascii(ref s) => &s.0,
115 &Encoding::Unicode(ref s) => &s.0,
116 }
117 }};
118}
119
120impl<S: AsRef<str> + Default> Default for UniCase<S> {
121 fn default() -> Self {
122 Self::new(Default::default())
123 }
124}
125
126impl<S: AsRef<str>> UniCase<S> {
127 /// Creates a new `UniCase`.
128 ///
129 /// Note: This scans the text to determine if it is all ASCII or not.
130 pub fn new(s: S) -> UniCase<S> {
131 #[cfg(not(__unicase__core_and_alloc))]
132 #[allow(deprecated, unused)]
133 use std::ascii::AsciiExt;
134
135 if s.as_ref().is_ascii() {
136 UniCase(Encoding::Ascii(Ascii(s)))
137 } else {
138 UniCase(Encoding::Unicode(Unicode(s)))
139 }
140 }
141}
142
143impl<S> UniCase<S> {
144 /// Creates a new `UniCase`, skipping the ASCII check.
145 #[cfg(__unicase__const_fns)]
146 pub const fn unicode(s: S) -> UniCase<S> {
147 UniCase(Encoding::Unicode(Unicode(s)))
148 }
149
150 /// Creates a new `UniCase`, skipping the ASCII check.
151 ///
152 /// For Rust versions >= 1.31, this is a `const fn`.
153 #[cfg(not(__unicase__const_fns))]
154 pub fn unicode(s: S) -> UniCase<S> {
155 UniCase(Encoding::Unicode(Unicode(s)))
156 }
157
158 /// Creates a new `UniCase` which performs only ASCII case folding.
159 #[cfg(__unicase__const_fns)]
160 pub const fn ascii(s: S) -> UniCase<S> {
161 UniCase(Encoding::Ascii(Ascii(s)))
162 }
163
164 /// Creates a new `UniCase` which performs only ASCII case folding.
165 ///
166 /// For Rust versions >= 1.31, this is a `const fn`.
167 #[cfg(not(__unicase__const_fns))]
168 pub fn ascii(s: S) -> UniCase<S> {
169 UniCase(Encoding::Ascii(Ascii(s)))
170 }
171
172 /// Return `true` if this instance will only perform ASCII case folding.
173 pub fn is_ascii(&self) -> bool {
174 match self.0 {
175 Encoding::Ascii(_) => true,
176 Encoding::Unicode(_) => false,
177 }
178 }
179
180 /// Unwraps the inner value held by this `UniCase`.
181 #[inline]
182 pub fn into_inner(self) -> S {
183 match self.0 {
184 Encoding::Ascii(s) => s.0,
185 Encoding::Unicode(s) => s.0,
186 }
187 }
188}
189
190impl<S> Deref for UniCase<S> {
191 type Target = S;
192 #[inline]
193 fn deref<'a>(&'a self) -> &'a S {
194 inner!(self.0)
195 }
196}
197
198impl<S> DerefMut for UniCase<S> {
199 #[inline]
200 fn deref_mut<'a>(&'a mut self) -> &'a mut S {
201 inner!(mut self.0)
202 }
203}
204
205impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
206 #[inline]
207 fn as_ref(&self) -> &str {
208 inner!(self.0).as_ref()
209 }
210}
211
212impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
213 #[inline]
214 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
215 fmt::Debug::fmt(self:inner!(self.0), f:fmt)
216 }
217}
218
219impl<S: fmt::Display> fmt::Display for UniCase<S> {
220 #[inline]
221 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
222 fmt::Display::fmt(self:inner!(self.0), f:fmt)
223 }
224}
225
226impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
227 #[inline]
228 fn eq(&self, other: &UniCase<S2>) -> bool {
229 match (&self.0, &other.0) {
230 (&Encoding::Ascii(ref x: &Ascii), &Encoding::Ascii(ref y: &Ascii)) => x == y,
231 (&Encoding::Unicode(ref x: &Unicode), &Encoding::Unicode(ref y: &Unicode)) => x == y,
232 (&Encoding::Ascii(ref x: &Ascii), &Encoding::Unicode(ref y: &Unicode)) => &Unicode(x.as_ref()) == y,
233 (&Encoding::Unicode(ref x: &Unicode), &Encoding::Ascii(ref y: &Ascii)) => x == &Unicode(y.as_ref()),
234 }
235 }
236}
237
238impl<S: AsRef<str>> Eq for UniCase<S> {}
239
240impl<S: AsRef<str>> Hash for UniCase<S> {
241 #[inline]
242 fn hash<H: Hasher>(&self, hasher: &mut H) {
243 match self.0 {
244 Encoding::Ascii(ref s: &Ascii) => s.hash(state:hasher),
245 Encoding::Unicode(ref s: &Unicode) => s.hash(state:hasher),
246 }
247 }
248}
249
250impl<S> From<Ascii<S>> for UniCase<S> {
251 fn from(ascii: Ascii<S>) -> Self {
252 UniCase(Encoding::Ascii(ascii))
253 }
254}
255
256macro_rules! from_impl {
257 ($from:ty => $to:ty; $by:ident) => (
258 impl<'a> From<$from> for UniCase<$to> {
259 fn from(s: $from) -> Self {
260 UniCase::unicode(s.$by())
261 }
262 }
263 );
264 ($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
265}
266
267macro_rules! into_impl {
268 ($to:ty) => {
269 impl<'a> Into<$to> for UniCase<$to> {
270 fn into(self) -> $to {
271 self.into_inner()
272 }
273 }
274 };
275}
276
277impl<S: AsRef<str>> From<S> for UniCase<S> {
278 fn from(s: S) -> Self {
279 UniCase::unicode(s)
280 }
281}
282
283from_impl!(&'a str => Cow<'a, str>);
284from_impl!(String => Cow<'a, str>);
285from_impl!(&'a str => String);
286from_impl!(Cow<'a, str> => String; into_owned);
287from_impl!(&'a String => &'a str; as_ref);
288
289into_impl!(&'a str);
290into_impl!(String);
291into_impl!(Cow<'a, str>);
292
293#[cfg(__unicase__iter_cmp)]
294impl<T: AsRef<str>> PartialOrd for UniCase<T> {
295 #[inline]
296 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
297 Some(self.cmp(other))
298 }
299}
300
301#[cfg(__unicase__iter_cmp)]
302impl<T: AsRef<str>> Ord for UniCase<T> {
303 #[inline]
304 fn cmp(&self, other: &Self) -> Ordering {
305 match (&self.0, &other.0) {
306 (&Encoding::Ascii(ref x: &Ascii), &Encoding::Ascii(ref y: &Ascii)) => x.cmp(y),
307 (&Encoding::Unicode(ref x: &Unicode), &Encoding::Unicode(ref y: &Unicode)) => x.cmp(y),
308 (&Encoding::Ascii(ref x: &Ascii), &Encoding::Unicode(ref y: &Unicode)) => {
309 Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref()))
310 }
311 (&Encoding::Unicode(ref x: &Unicode), &Encoding::Ascii(ref y: &Ascii)) => {
312 Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref()))
313 }
314 }
315 }
316}
317
318impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
319 type Err = <S as FromStr>::Err;
320 fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
321 s.parse().map(op:UniCase::new)
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::UniCase;
328 #[cfg(__unicase__default_hasher)]
329 use std::collections::hash_map::DefaultHasher;
330 #[cfg(not(__unicase__default_hasher))]
331 use std::hash::SipHasher as DefaultHasher;
332 use std::hash::{Hash, Hasher};
333
334 fn hash<T: Hash>(t: &T) -> u64 {
335 let mut s = DefaultHasher::new();
336 t.hash(&mut s);
337 s.finish()
338 }
339
340 #[test]
341 fn test_copy_for_refs() {
342 fn foo<T>(_: UniCase<T>) {}
343
344 let a = UniCase::new("foobar");
345 foo(a);
346 foo(a);
347 }
348
349 #[test]
350 fn test_eq_ascii() {
351 let a = UniCase::new("foobar");
352 let b = UniCase::new("FOOBAR");
353 let c = UniCase::ascii("FoObAr");
354
355 assert_eq!(a, b);
356 assert_eq!(b, a);
357 assert_eq!(a, c);
358 assert_eq!(c, a);
359 assert_eq!(hash(&a), hash(&b));
360 assert_eq!(hash(&a), hash(&c));
361 assert!(a.is_ascii());
362 assert!(b.is_ascii());
363 assert!(c.is_ascii());
364 }
365
366 #[test]
367 fn test_eq_unicode() {
368 let a = UniCase::new("στιγμας");
369 let b = UniCase::new("στιγμασ");
370 assert_eq!(a, b);
371 assert_eq!(b, a);
372 assert_eq!(hash(&a), hash(&b));
373 }
374
375 #[test]
376 fn test_eq_unicode_left_is_substring() {
377 // https://github.com/seanmonstar/unicase/issues/38
378 let a = UniCase::unicode("foo");
379 let b = UniCase::unicode("foobar");
380
381 assert!(a != b);
382 assert!(b != a);
383 }
384
385 #[cfg(feature = "nightly")]
386 #[bench]
387 fn bench_unicase_ascii(b: &mut ::test::Bencher) {
388 b.bytes = b"foobar".len() as u64;
389 let x = UniCase::new("foobar");
390 let y = UniCase::new("FOOBAR");
391 b.iter(|| assert_eq!(x, y));
392 }
393
394 #[cfg(feature = "nightly")]
395 static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
396
397 #[cfg(feature = "nightly")]
398 #[inline(never)]
399 fn is_ascii(bytes: &[u8]) -> bool {
400 #[allow(unused, deprecated)]
401 use std::ascii::AsciiExt;
402 bytes.is_ascii()
403 }
404
405 #[cfg(feature = "nightly")]
406 #[bench]
407 fn bench_is_ascii(b: &mut ::test::Bencher) {
408 b.iter(|| assert!(is_ascii(SUBJECT)));
409 }
410
411 #[cfg(feature = "nightly")]
412 #[bench]
413 fn bench_is_utf8(b: &mut ::test::Bencher) {
414 b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
415 }
416
417 #[cfg(__unicase__iter_cmp)]
418 #[test]
419 fn test_case_cmp() {
420 assert!(UniCase::new("a") < UniCase::new("B"));
421
422 assert!(UniCase::new("A") < UniCase::new("b"));
423 assert!(UniCase::new("aa") > UniCase::new("a"));
424
425 assert!(UniCase::new("a") < UniCase::new("aa"));
426 assert!(UniCase::new("a") < UniCase::new("AA"));
427 }
428
429 #[test]
430 fn test_from_impls() {
431 let view: &'static str = "foobar";
432 let _: UniCase<&'static str> = view.into();
433 let _: UniCase<&str> = view.into();
434 let _: UniCase<String> = view.into();
435
436 let owned: String = view.to_owned();
437 let _: UniCase<&str> = (&owned).into();
438 let _: UniCase<String> = owned.into();
439 }
440
441 #[test]
442 fn test_into_impls() {
443 let view: UniCase<&'static str> = UniCase::new("foobar");
444 let _: &'static str = view.into();
445 let _: &str = view.into();
446
447 let owned: UniCase<String> = "foobar".into();
448 let _: String = owned.clone().into();
449 let _: &str = owned.as_ref();
450 }
451
452 #[cfg(__unicase__const_fns)]
453 #[test]
454 fn test_unicase_unicode_const() {
455 const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
456 }
457}
458