1//! count occurrences of a given byte, or the number of UTF-8 code points, in a
2//! byte slice, fast.
3//!
4//! This crate has the [`count`](fn.count.html) method to count byte
5//! occurrences (for example newlines) in a larger `&[u8]` slice.
6//!
7//! For example:
8//!
9//! ```rust
10//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
11//! ```
12//!
13//! Also there is a [`num_chars`](fn.num_chars.html) method to count
14//! the number of UTF8 characters in a slice. It will work the same as
15//! `str::chars().count()` for byte slices of correct UTF-8 character
16//! sequences. The result will likely be off for invalid sequences,
17//! although the result is guaranteed to be between `0` and
18//! `[_]::len()`, inclusive.
19//!
20//! Example:
21//!
22//! ```rust
23//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
24//! assert_eq!(sequence.chars().count(),
25//! bytecount::num_chars(sequence.as_bytes()));
26//! ```
27//!
28//! For completeness and easy comparison, the "naive" versions of both
29//! count and num_chars are provided. Those are also faster if used on
30//! predominantly small strings. The
31//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
32//! still on small strings.
33
34#![deny(missing_docs)]
35#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
36
37#[cfg(not(feature = "runtime-dispatch-simd"))]
38use core::mem;
39#[cfg(feature = "runtime-dispatch-simd")]
40use std::mem;
41
42mod naive;
43pub use naive::*;
44mod integer_simd;
45
46#[cfg(any(
47 all(
48 feature = "runtime-dispatch-simd",
49 any(target_arch = "x86", target_arch = "x86_64")
50 ),
51 target_arch = "aarch64",
52 feature = "generic-simd"
53))]
54mod simd;
55
56/// Count occurrences of a byte in a slice of bytes, fast
57///
58/// # Examples
59///
60/// ```
61/// let s = b"This is a Text with spaces";
62/// let number_of_spaces = bytecount::count(s, b' ');
63/// assert_eq!(number_of_spaces, 5);
64/// ```
65pub fn count(haystack: &[u8], needle: u8) -> usize {
66 if haystack.len() >= 32 {
67 #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
68 {
69 if is_x86_feature_detected!("avx2") {
70 unsafe {
71 return simd::x86_avx2::chunk_count(haystack, needle);
72 }
73 }
74 }
75
76 #[cfg(feature = "generic-simd")]
77 return simd::generic::chunk_count(haystack, needle);
78 }
79
80 if haystack.len() >= 16 {
81 #[cfg(all(
82 feature = "runtime-dispatch-simd",
83 any(target_arch = "x86", target_arch = "x86_64"),
84 not(feature = "generic-simd")
85 ))]
86 {
87 if is_x86_feature_detected!("sse2") {
88 unsafe {
89 return simd::x86_sse2::chunk_count(haystack, needle);
90 }
91 }
92 }
93 #[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
94 {
95 unsafe {
96 return simd::aarch64::chunk_count(haystack, needle);
97 }
98 }
99 }
100
101 if haystack.len() >= mem::size_of::<usize>() {
102 return integer_simd::chunk_count(haystack, needle);
103 }
104
105 naive_count(haystack, needle)
106}
107
108/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
109///
110/// This function is safe to use on any byte array, valid UTF-8 or not,
111/// but the output is only meaningful for well-formed UTF-8.
112///
113/// # Example
114///
115/// ```
116/// let swordfish = "メカジキ";
117/// let char_count = bytecount::num_chars(swordfish.as_bytes());
118/// assert_eq!(char_count, 4);
119/// ```
120pub fn num_chars(utf8_chars: &[u8]) -> usize {
121 if utf8_chars.len() >= 32 {
122 #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
123 {
124 if is_x86_feature_detected!("avx2") {
125 unsafe {
126 return simd::x86_avx2::chunk_num_chars(utf8_chars);
127 }
128 }
129 }
130
131 #[cfg(feature = "generic-simd")]
132 return simd::generic::chunk_num_chars(utf8_chars);
133 }
134
135 if utf8_chars.len() >= 16 {
136 #[cfg(all(
137 feature = "runtime-dispatch-simd",
138 any(target_arch = "x86", target_arch = "x86_64"),
139 not(feature = "generic-simd")
140 ))]
141 {
142 if is_x86_feature_detected!("sse2") {
143 unsafe {
144 return simd::x86_sse2::chunk_num_chars(utf8_chars);
145 }
146 }
147 }
148 #[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
149 {
150 unsafe {
151 return simd::aarch64::chunk_num_chars(utf8_chars);
152 }
153 }
154 }
155
156 if utf8_chars.len() >= mem::size_of::<usize>() {
157 return integer_simd::chunk_num_chars(utf8_chars);
158 }
159
160 naive_num_chars(utf8_chars)
161}
162