1//! count occurrences of a given byte, or the number of UTF-8 code points, in a
2//! byte slice, fast.
3//!
4//! This crate has the [`count`](fn.count.html) method to count byte
5//! occurrences (for example newlines) in a larger `&[u8]` slice.
6//!
7//! For example:
8//!
9//! ```rust
10//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
11//! ```
12//!
13//! Also there is a [`num_chars`](fn.num_chars.html) method to count
14//! the number of UTF8 characters in a slice. It will work the same as
15//! `str::chars().count()` for byte slices of correct UTF-8 character
16//! sequences. The result will likely be off for invalid sequences,
17//! although the result is guaranteed to be between `0` and
18//! `[_]::len()`, inclusive.
19//!
20//! Example:
21//!
22//! ```rust
23//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
24//! assert_eq!(sequence.chars().count(),
25//! bytecount::num_chars(sequence.as_bytes()));
26//! ```
27//!
28//! For completeness and easy comparison, the "naive" versions of both
29//! count and num_chars are provided. Those are also faster if used on
30//! predominantly small strings. The
31//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
32//! still on small strings.
33
34#![cfg_attr(feature = "generic-simd", feature(portable_simd))]
35
36#![deny(missing_docs)]
37#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
38
39#[cfg(not(feature = "runtime-dispatch-simd"))]
40use core::mem;
41#[cfg(feature = "runtime-dispatch-simd")]
42use std::mem;
43
44mod naive;
45pub use naive::*;
46mod integer_simd;
47
48#[cfg(any(
49 all(
50 feature = "runtime-dispatch-simd",
51 any(target_arch = "x86", target_arch = "x86_64")
52 ),
53 target_arch = "aarch64",
54 target_arch = "wasm32",
55 feature = "generic-simd"
56))]
57mod simd;
58
59/// Count occurrences of a byte in a slice of bytes, fast
60///
61/// # Examples
62///
63/// ```
64/// let s = b"This is a Text with spaces";
65/// let number_of_spaces = bytecount::count(s, b' ');
66/// assert_eq!(number_of_spaces, 5);
67/// ```
68pub fn count(haystack: &[u8], needle: u8) -> usize {
69 if haystack.len() >= 32 {
70 #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
71 {
72 if is_x86_feature_detected!("avx2") {
73 unsafe {
74 return simd::x86_avx2::chunk_count(haystack, needle);
75 }
76 }
77 }
78
79 #[cfg(feature = "generic-simd")]
80 return simd::generic::chunk_count(haystack, needle);
81 }
82
83 if haystack.len() >= 16 {
84 #[cfg(all(
85 feature = "runtime-dispatch-simd",
86 any(target_arch = "x86", target_arch = "x86_64"),
87 not(feature = "generic-simd")
88 ))]
89 {
90 if is_x86_feature_detected!("sse2") {
91 unsafe {
92 return simd::x86_sse2::chunk_count(haystack, needle);
93 }
94 }
95 }
96 #[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
97 {
98 unsafe {
99 return simd::aarch64::chunk_count(haystack, needle);
100 }
101 }
102
103 #[cfg(target_arch = "wasm32")]
104 {
105 unsafe {
106 return simd::wasm::chunk_count(haystack, needle);
107 }
108 }
109 }
110
111 if haystack.len() >= mem::size_of::<usize>() {
112 return integer_simd::chunk_count(haystack, needle);
113 }
114
115 naive_count(haystack, needle)
116}
117
118/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
119///
120/// This function is safe to use on any byte array, valid UTF-8 or not,
121/// but the output is only meaningful for well-formed UTF-8.
122///
123/// # Example
124///
125/// ```
126/// let swordfish = "メカジキ";
127/// let char_count = bytecount::num_chars(swordfish.as_bytes());
128/// assert_eq!(char_count, 4);
129/// ```
130pub fn num_chars(utf8_chars: &[u8]) -> usize {
131 if utf8_chars.len() >= 32 {
132 #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
133 {
134 if is_x86_feature_detected!("avx2") {
135 unsafe {
136 return simd::x86_avx2::chunk_num_chars(utf8_chars);
137 }
138 }
139 }
140
141 #[cfg(feature = "generic-simd")]
142 return simd::generic::chunk_num_chars(utf8_chars);
143 }
144
145 if utf8_chars.len() >= 16 {
146 #[cfg(all(
147 feature = "runtime-dispatch-simd",
148 any(target_arch = "x86", target_arch = "x86_64"),
149 not(feature = "generic-simd")
150 ))]
151 {
152 if is_x86_feature_detected!("sse2") {
153 unsafe {
154 return simd::x86_sse2::chunk_num_chars(utf8_chars);
155 }
156 }
157 }
158 #[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
159 {
160 unsafe {
161 return simd::aarch64::chunk_num_chars(utf8_chars);
162 }
163 }
164
165 #[cfg(target_arch = "wasm32")]
166 {
167 unsafe {
168 return simd::wasm::chunk_num_chars(utf8_chars);
169 }
170 }
171 }
172
173 if utf8_chars.len() >= mem::size_of::<usize>() {
174 return integer_simd::chunk_num_chars(utf8_chars);
175 }
176
177 naive_num_chars(utf8_chars)
178}
179

Provided by KDAB

Privacy Policy
Learn Rust with the experts
Find out more