1 | //! count occurrences of a given byte, or the number of UTF-8 code points, in a |
2 | //! byte slice, fast. |
3 | //! |
4 | //! This crate has the [`count`](fn.count.html) method to count byte |
5 | //! occurrences (for example newlines) in a larger `&[u8]` slice. |
6 | //! |
7 | //! For example: |
8 | //! |
9 | //! ```rust |
10 | //! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!" , b' ' )); |
11 | //! ``` |
12 | //! |
13 | //! Also there is a [`num_chars`](fn.num_chars.html) method to count |
14 | //! the number of UTF8 characters in a slice. It will work the same as |
15 | //! `str::chars().count()` for byte slices of correct UTF-8 character |
16 | //! sequences. The result will likely be off for invalid sequences, |
17 | //! although the result is guaranteed to be between `0` and |
18 | //! `[_]::len()`, inclusive. |
19 | //! |
20 | //! Example: |
21 | //! |
22 | //! ```rust |
23 | //! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!" ; |
24 | //! assert_eq!(sequence.chars().count(), |
25 | //! bytecount::num_chars(sequence.as_bytes())); |
26 | //! ``` |
27 | //! |
28 | //! For completeness and easy comparison, the "naive" versions of both |
29 | //! count and num_chars are provided. Those are also faster if used on |
30 | //! predominantly small strings. The |
31 | //! [`naive_count_32`](fn.naive_count_32.html) method can be faster |
32 | //! still on small strings. |
33 | |
34 | #![deny (missing_docs)] |
35 | #![cfg_attr (not(feature = "runtime-dispatch-simd" ), no_std)] |
36 | |
37 | #[cfg (not(feature = "runtime-dispatch-simd" ))] |
38 | use core::mem; |
39 | #[cfg (feature = "runtime-dispatch-simd" )] |
40 | use std::mem; |
41 | |
42 | mod naive; |
43 | pub use naive::*; |
44 | mod integer_simd; |
45 | |
46 | #[cfg (any( |
47 | all( |
48 | feature = "runtime-dispatch-simd" , |
49 | any(target_arch = "x86" , target_arch = "x86_64" ) |
50 | ), |
51 | target_arch = "aarch64" , |
52 | feature = "generic-simd" |
53 | ))] |
54 | mod simd; |
55 | |
56 | /// Count occurrences of a byte in a slice of bytes, fast |
57 | /// |
58 | /// # Examples |
59 | /// |
60 | /// ``` |
61 | /// let s = b"This is a Text with spaces" ; |
62 | /// let number_of_spaces = bytecount::count(s, b' ' ); |
63 | /// assert_eq!(number_of_spaces, 5); |
64 | /// ``` |
65 | pub fn count(haystack: &[u8], needle: u8) -> usize { |
66 | if haystack.len() >= 32 { |
67 | #[cfg (all(feature = "runtime-dispatch-simd" , target_arch = "x86_64" ))] |
68 | { |
69 | if is_x86_feature_detected!("avx2" ) { |
70 | unsafe { |
71 | return simd::x86_avx2::chunk_count(haystack, needle); |
72 | } |
73 | } |
74 | } |
75 | |
76 | #[cfg (feature = "generic-simd" )] |
77 | return simd::generic::chunk_count(haystack, needle); |
78 | } |
79 | |
80 | if haystack.len() >= 16 { |
81 | #[cfg (all( |
82 | feature = "runtime-dispatch-simd" , |
83 | any(target_arch = "x86" , target_arch = "x86_64" ), |
84 | not(feature = "generic-simd" ) |
85 | ))] |
86 | { |
87 | if is_x86_feature_detected!("sse2" ) { |
88 | unsafe { |
89 | return simd::x86_sse2::chunk_count(haystack, needle); |
90 | } |
91 | } |
92 | } |
93 | #[cfg (all(target_arch = "aarch64" , not(feature = "generic_simd" )))] |
94 | { |
95 | unsafe { |
96 | return simd::aarch64::chunk_count(haystack, needle); |
97 | } |
98 | } |
99 | } |
100 | |
101 | if haystack.len() >= mem::size_of::<usize>() { |
102 | return integer_simd::chunk_count(haystack, needle); |
103 | } |
104 | |
105 | naive_count(haystack, needle) |
106 | } |
107 | |
108 | /// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast |
109 | /// |
110 | /// This function is safe to use on any byte array, valid UTF-8 or not, |
111 | /// but the output is only meaningful for well-formed UTF-8. |
112 | /// |
113 | /// # Example |
114 | /// |
115 | /// ``` |
116 | /// let swordfish = "メカジキ" ; |
117 | /// let char_count = bytecount::num_chars(swordfish.as_bytes()); |
118 | /// assert_eq!(char_count, 4); |
119 | /// ``` |
120 | pub fn num_chars(utf8_chars: &[u8]) -> usize { |
121 | if utf8_chars.len() >= 32 { |
122 | #[cfg (all(feature = "runtime-dispatch-simd" , target_arch = "x86_64" ))] |
123 | { |
124 | if is_x86_feature_detected!("avx2" ) { |
125 | unsafe { |
126 | return simd::x86_avx2::chunk_num_chars(utf8_chars); |
127 | } |
128 | } |
129 | } |
130 | |
131 | #[cfg (feature = "generic-simd" )] |
132 | return simd::generic::chunk_num_chars(utf8_chars); |
133 | } |
134 | |
135 | if utf8_chars.len() >= 16 { |
136 | #[cfg (all( |
137 | feature = "runtime-dispatch-simd" , |
138 | any(target_arch = "x86" , target_arch = "x86_64" ), |
139 | not(feature = "generic-simd" ) |
140 | ))] |
141 | { |
142 | if is_x86_feature_detected!("sse2" ) { |
143 | unsafe { |
144 | return simd::x86_sse2::chunk_num_chars(utf8_chars); |
145 | } |
146 | } |
147 | } |
148 | #[cfg (all(target_arch = "aarch64" , not(feature = "generic_simd" )))] |
149 | { |
150 | unsafe { |
151 | return simd::aarch64::chunk_num_chars(utf8_chars); |
152 | } |
153 | } |
154 | } |
155 | |
156 | if utf8_chars.len() >= mem::size_of::<usize>() { |
157 | return integer_simd::chunk_num_chars(utf8_chars); |
158 | } |
159 | |
160 | naive_num_chars(utf8_chars) |
161 | } |
162 | |