lib.rs source code [crates/bytecount/src/lib.rs]

1	//! count occurrences of a given byte, or the number of UTF-8 code points, in a
2	//! byte slice, fast.
3	//!
4	//! This crate has the [`count`](fn.count.html) method to count byte
5	//! occurrences (for example newlines) in a larger `&[u8]` slice.
6	//!
7	//! For example:
8	//!
9	//! ```rust
10	//! assert_eq!(`5`, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
11	//! ```
12	//!
13	//! Also there is a [`num_chars`](fn.num_chars.html) method to count
14	//! the number of UTF8 characters in a slice. It will work the same as
15	//! `str::chars().count()` for byte slices of correct UTF-8 character
16	//! sequences. The result will likely be off for invalid sequences,
17	//! although the result is guaranteed to be between `0` and
18	//! `[_]::len()`, inclusive.
19	//!
20	//! Example:
21	//!
22	//! ```rust
23	//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
24	//! assert_eq!(sequence.chars().count(),
25	//! bytecount::num_chars(sequence.as_bytes()));
26	//! ```
27	//!
28	//! For completeness and easy comparison, the "naive" versions of both
29	//! count and num_chars are provided. Those are also faster if used on
30	//! predominantly small strings. The
31	//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
32	//! still on small strings.
33
34	#![deny(missing_docs)]
35	#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
36
37	#[cfg(not(feature = "runtime-dispatch-simd"))]
38	use core::mem;
39	#[cfg(feature = "runtime-dispatch-simd")]
40	use std::mem;
41
42	mod naive;
43	pub use naive::*;
44	mod integer_simd;
45
46	#[cfg(any(
47	all(
48	feature = "runtime-dispatch-simd",
49	any(target_arch = "x86", target_arch = "x86_64")
50	),
51	target_arch = "aarch64",
52	feature = "generic-simd"
53	))]
54	mod simd;
55
56	/// Count occurrences of a byte in a slice of bytes, fast
57	///
58	/// # Examples
59	///
60	/// ```
61	/// let s = b"This is a Text with spaces";
62	/// let number_of_spaces = bytecount::count(s, b' ');
63	/// assert_eq!(number_of_spaces, `5`);
64	/// ```
65	pub fn count(haystack: &[u8], needle: u8) -> usize {
66	if haystack.len() >= `32` {
67	#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
68	{
69	if is_x86_feature_detected!("avx2") {
70	unsafe {
71	return simd::x86_avx2::chunk_count(haystack, needle);
72	}
73	}
74	}
75
76	#[cfg(feature = "generic-simd")]
77	return simd::generic::chunk_count(haystack, needle);
78	}
79
80	if haystack.len() >= `16` {
81	#[cfg(all(
82	feature = "runtime-dispatch-simd",
83	any(target_arch = "x86", target_arch = "x86_64"),
84	not(feature = "generic-simd")
85	))]
86	{
87	if is_x86_feature_detected!("sse2") {
88	unsafe {
89	return simd::x86_sse2::chunk_count(haystack, needle);
90	}
91	}
92	}
93	#[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
94	{
95	unsafe {
96	return simd::aarch64::chunk_count(haystack, needle);
97	}
98	}
99	}
100
101	if haystack.len() >= mem::size_of::<usize>() {
102	return integer_simd::chunk_count(haystack, needle);
103	}
104
105	naive_count(haystack, needle)
106	}
107
108	/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
109	///
110	/// This function is safe to use on any byte array, valid UTF-8 or not,
111	/// but the output is only meaningful for well-formed UTF-8.
112	///
113	/// # Example
114	///
115	/// ```
116	/// let swordfish = "メカジキ";
117	/// let char_count = bytecount::num_chars(swordfish.as_bytes());
118	/// assert_eq!(char_count, `4`);
119	/// ```
120	pub fn num_chars(utf8_chars: &[u8]) -> usize {
121	if utf8_chars.len() >= `32` {
122	#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
123	{
124	if is_x86_feature_detected!("avx2") {
125	unsafe {
126	return simd::x86_avx2::chunk_num_chars(utf8_chars);
127	}
128	}
129	}
130
131	#[cfg(feature = "generic-simd")]
132	return simd::generic::chunk_num_chars(utf8_chars);
133	}
134
135	if utf8_chars.len() >= `16` {
136	#[cfg(all(
137	feature = "runtime-dispatch-simd",
138	any(target_arch = "x86", target_arch = "x86_64"),
139	not(feature = "generic-simd")
140	))]
141	{
142	if is_x86_feature_detected!("sse2") {
143	unsafe {
144	return simd::x86_sse2::chunk_num_chars(utf8_chars);
145	}
146	}
147	}
148	#[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
149	{
150	unsafe {
151	return simd::aarch64::chunk_num_chars(utf8_chars);
152	}
153	}
154	}
155
156	if utf8_chars.len() >= mem::size_of::<usize>() {
157	return integer_simd::chunk_num_chars(utf8_chars);
158	}
159
160	naive_num_chars(utf8_chars)
161	}
162