1 | /*! |
2 | Wrapper routines for `memchr` and friends. |
3 | |
4 | These routines efficiently dispatch to the best implementation based on what |
5 | the CPU supports. |
6 | */ |
7 | |
8 | /// Provides a way to run a memchr-like function while amortizing the cost of |
9 | /// runtime CPU feature detection. |
10 | /// |
11 | /// This works by loading a function pointer from an atomic global. Initially, |
12 | /// this global is set to a function that does CPU feature detection. For |
13 | /// example, if AVX2 is enabled, then the AVX2 implementation is used. |
14 | /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And |
15 | /// in some niche cases, if SSE2 isn't available, then the architecture |
16 | /// independent fallback implementation is used.) |
17 | /// |
18 | /// After the first call to this function, the atomic global is replaced with |
19 | /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then |
20 | /// will directly call the chosen routine instead of needing to go through the |
21 | /// CPU feature detection branching again. |
22 | /// |
23 | /// This particular macro is specifically written to provide the implementation |
24 | /// of functions with the following signature: |
25 | /// |
26 | /// ```ignore |
27 | /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>; |
28 | /// ``` |
29 | /// |
30 | /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and |
31 | /// `needle3`, respectively. The `start` and `end` parameters correspond to the |
32 | /// start and end of the haystack, respectively. |
33 | /// |
34 | /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so |
35 | /// that the function is compatible with our lower level iterator logic that |
36 | /// operates on raw pointers. We use this macro to implement "raw" memchr |
37 | /// routines with the signature above, and then define memchr routines using |
38 | /// regular slices on top of them. |
39 | /// |
40 | /// Note that we use `#[cfg(target_feature = "sse2")]` below even though |
41 | /// it shouldn't be strictly necessary because without it, it seems to |
42 | /// cause the compiler to blow up. I guess it can't handle a function |
43 | /// pointer being created with a sse target feature? Dunno. See the |
44 | /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with |
45 | /// this. |
46 | /// |
47 | /// # Safety |
48 | /// |
49 | /// Primarily callers must that `$fnty` is a correct function pointer type and |
50 | /// not something else. |
51 | /// |
52 | /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a |
53 | /// routine that returns a valid function pointer when a match is found. That |
54 | /// is, a pointer that is `>= start` and `< end`. |
55 | /// |
56 | /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers |
57 | /// correspond to valid pointers. |
58 | macro_rules! unsafe_ifunc { |
59 | ( |
60 | $memchrty:ident, |
61 | $memchrfind:ident, |
62 | $fnty:ty, |
63 | $retty:ty, |
64 | $hay_start:ident, |
65 | $hay_end:ident, |
66 | $($needle:ident),+ |
67 | ) => {{ |
68 | #![allow(unused_unsafe)] |
69 | |
70 | use core::sync::atomic::{AtomicPtr, Ordering}; |
71 | |
72 | type Fn = *mut (); |
73 | type RealFn = $fnty; |
74 | static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); |
75 | |
76 | #[cfg(target_feature = "sse2" )] |
77 | #[target_feature(enable = "sse2" , enable = "avx2" )] |
78 | unsafe fn find_avx2( |
79 | $($needle: u8),+, |
80 | $hay_start: *const u8, |
81 | $hay_end: *const u8, |
82 | ) -> $retty { |
83 | use crate::arch::x86_64::avx2::memchr::$memchrty; |
84 | $memchrty::new_unchecked($($needle),+) |
85 | .$memchrfind($hay_start, $hay_end) |
86 | } |
87 | |
88 | #[cfg(target_feature = "sse2" )] |
89 | #[target_feature(enable = "sse2" )] |
90 | unsafe fn find_sse2( |
91 | $($needle: u8),+, |
92 | $hay_start: *const u8, |
93 | $hay_end: *const u8, |
94 | ) -> $retty { |
95 | use crate::arch::x86_64::sse2::memchr::$memchrty; |
96 | $memchrty::new_unchecked($($needle),+) |
97 | .$memchrfind($hay_start, $hay_end) |
98 | } |
99 | |
100 | unsafe fn find_fallback( |
101 | $($needle: u8),+, |
102 | $hay_start: *const u8, |
103 | $hay_end: *const u8, |
104 | ) -> $retty { |
105 | use crate::arch::all::memchr::$memchrty; |
106 | $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) |
107 | } |
108 | |
109 | unsafe fn detect( |
110 | $($needle: u8),+, |
111 | $hay_start: *const u8, |
112 | $hay_end: *const u8, |
113 | ) -> $retty { |
114 | let fun = { |
115 | #[cfg(not(target_feature = "sse2" ))] |
116 | { |
117 | debug!( |
118 | "no sse2 feature available, using fallback for {}" , |
119 | stringify!($memchrty), |
120 | ); |
121 | find_fallback as RealFn |
122 | } |
123 | #[cfg(target_feature = "sse2" )] |
124 | { |
125 | use crate::arch::x86_64::{sse2, avx2}; |
126 | if avx2::memchr::$memchrty::is_available() { |
127 | debug!("chose AVX2 for {}" , stringify!($memchrty)); |
128 | find_avx2 as RealFn |
129 | } else if sse2::memchr::$memchrty::is_available() { |
130 | debug!("chose SSE2 for {}" , stringify!($memchrty)); |
131 | find_sse2 as RealFn |
132 | } else { |
133 | debug!("chose fallback for {}" , stringify!($memchrty)); |
134 | find_fallback as RealFn |
135 | } |
136 | } |
137 | }; |
138 | FN.store(fun as Fn, Ordering::Relaxed); |
139 | // SAFETY: The only thing we need to uphold here is the |
140 | // `#[target_feature]` requirements. Since we check is_available |
141 | // above before using the corresponding implementation, we are |
142 | // guaranteed to only call code that is supported on the current |
143 | // CPU. |
144 | fun($($needle),+, $hay_start, $hay_end) |
145 | } |
146 | |
147 | // SAFETY: By virtue of the caller contract, RealFn is a function |
148 | // pointer, which is always safe to transmute with a *mut (). Also, |
149 | // since we use $memchrty::is_available, it is guaranteed to be safe |
150 | // to call $memchrty::$memchrfind. |
151 | unsafe { |
152 | let fun = FN.load(Ordering::Relaxed); |
153 | core::mem::transmute::<Fn, RealFn>(fun)( |
154 | $($needle),+, |
155 | $hay_start, |
156 | $hay_end, |
157 | ) |
158 | } |
159 | }}; |
160 | } |
161 | |
162 | // The routines below dispatch to AVX2, SSE2 or a fallback routine based on |
163 | // what's available in the current environment. The secret sauce here is that |
164 | // we only check for which one to use approximately once, and then "cache" that |
165 | // choice into a global function pointer. Subsequent invocations then just call |
166 | // the appropriate function directly. |
167 | |
168 | /// memchr, but using raw pointers to represent the haystack. |
169 | /// |
170 | /// # Safety |
171 | /// |
172 | /// Pointers must be valid. See `One::find_raw`. |
173 | #[inline (always)] |
174 | pub(crate) fn memchr_raw( |
175 | n1: u8, |
176 | start: *const u8, |
177 | end: *const u8, |
178 | ) -> Option<*const u8> { |
179 | // SAFETY: We provide a valid function pointer type. |
180 | unsafe_ifunc!( |
181 | One, |
182 | find_raw, |
183 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, |
184 | Option<*const u8>, |
185 | start, |
186 | end, |
187 | n1 |
188 | ) |
189 | } |
190 | |
191 | /// memrchr, but using raw pointers to represent the haystack. |
192 | /// |
193 | /// # Safety |
194 | /// |
195 | /// Pointers must be valid. See `One::rfind_raw`. |
196 | #[inline (always)] |
197 | pub(crate) fn memrchr_raw( |
198 | n1: u8, |
199 | start: *const u8, |
200 | end: *const u8, |
201 | ) -> Option<*const u8> { |
202 | // SAFETY: We provide a valid function pointer type. |
203 | unsafe_ifunc!( |
204 | One, |
205 | rfind_raw, |
206 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, |
207 | Option<*const u8>, |
208 | start, |
209 | end, |
210 | n1 |
211 | ) |
212 | } |
213 | |
214 | /// memchr2, but using raw pointers to represent the haystack. |
215 | /// |
216 | /// # Safety |
217 | /// |
218 | /// Pointers must be valid. See `Two::find_raw`. |
219 | #[inline (always)] |
220 | pub(crate) fn memchr2_raw( |
221 | n1: u8, |
222 | n2: u8, |
223 | start: *const u8, |
224 | end: *const u8, |
225 | ) -> Option<*const u8> { |
226 | // SAFETY: We provide a valid function pointer type. |
227 | unsafe_ifunc!( |
228 | Two, |
229 | find_raw, |
230 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, |
231 | Option<*const u8>, |
232 | start, |
233 | end, |
234 | n1, |
235 | n2 |
236 | ) |
237 | } |
238 | |
239 | /// memrchr2, but using raw pointers to represent the haystack. |
240 | /// |
241 | /// # Safety |
242 | /// |
243 | /// Pointers must be valid. See `Two::rfind_raw`. |
244 | #[inline (always)] |
245 | pub(crate) fn memrchr2_raw( |
246 | n1: u8, |
247 | n2: u8, |
248 | start: *const u8, |
249 | end: *const u8, |
250 | ) -> Option<*const u8> { |
251 | // SAFETY: We provide a valid function pointer type. |
252 | unsafe_ifunc!( |
253 | Two, |
254 | rfind_raw, |
255 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, |
256 | Option<*const u8>, |
257 | start, |
258 | end, |
259 | n1, |
260 | n2 |
261 | ) |
262 | } |
263 | |
264 | /// memchr3, but using raw pointers to represent the haystack. |
265 | /// |
266 | /// # Safety |
267 | /// |
268 | /// Pointers must be valid. See `Three::find_raw`. |
269 | #[inline (always)] |
270 | pub(crate) fn memchr3_raw( |
271 | n1: u8, |
272 | n2: u8, |
273 | n3: u8, |
274 | start: *const u8, |
275 | end: *const u8, |
276 | ) -> Option<*const u8> { |
277 | // SAFETY: We provide a valid function pointer type. |
278 | unsafe_ifunc!( |
279 | Three, |
280 | find_raw, |
281 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, |
282 | Option<*const u8>, |
283 | start, |
284 | end, |
285 | n1, |
286 | n2, |
287 | n3 |
288 | ) |
289 | } |
290 | |
291 | /// memrchr3, but using raw pointers to represent the haystack. |
292 | /// |
293 | /// # Safety |
294 | /// |
295 | /// Pointers must be valid. See `Three::rfind_raw`. |
296 | #[inline (always)] |
297 | pub(crate) fn memrchr3_raw( |
298 | n1: u8, |
299 | n2: u8, |
300 | n3: u8, |
301 | start: *const u8, |
302 | end: *const u8, |
303 | ) -> Option<*const u8> { |
304 | // SAFETY: We provide a valid function pointer type. |
305 | unsafe_ifunc!( |
306 | Three, |
307 | rfind_raw, |
308 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, |
309 | Option<*const u8>, |
310 | start, |
311 | end, |
312 | n1, |
313 | n2, |
314 | n3 |
315 | ) |
316 | } |
317 | |
318 | /// Count all matching bytes, but using raw pointers to represent the haystack. |
319 | /// |
320 | /// # Safety |
321 | /// |
322 | /// Pointers must be valid. See `One::count_raw`. |
323 | #[inline (always)] |
324 | pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { |
325 | // SAFETY: We provide a valid function pointer type. |
326 | unsafe_ifunc!( |
327 | One, |
328 | count_raw, |
329 | unsafe fn(u8, *const u8, *const u8) -> usize, |
330 | usize, |
331 | start, |
332 | end, |
333 | n1 |
334 | ) |
335 | } |
336 | |