1/*!
2Wrapper routines for `memchr` and friends.
3
4These routines efficiently dispatch to the best implementation based on what
5the CPU supports.
6*/
7
8/// Provides a way to run a memchr-like function while amortizing the cost of
9/// runtime CPU feature detection.
10///
11/// This works by loading a function pointer from an atomic global. Initially,
12/// this global is set to a function that does CPU feature detection. For
13/// example, if AVX2 is enabled, then the AVX2 implementation is used.
14/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
15/// in some niche cases, if SSE2 isn't available, then the architecture
16/// independent fallback implementation is used.)
17///
18/// After the first call to this function, the atomic global is replaced with
19/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
20/// will directly call the chosen routine instead of needing to go through the
21/// CPU feature detection branching again.
22///
23/// This particular macro is specifically written to provide the implementation
24/// of functions with the following signature:
25///
26/// ```ignore
27/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
28/// ```
29///
30/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
31/// `needle3`, respectively. The `start` and `end` parameters correspond to the
32/// start and end of the haystack, respectively.
33///
34/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
35/// that the function is compatible with our lower level iterator logic that
36/// operates on raw pointers. We use this macro to implement "raw" memchr
37/// routines with the signature above, and then define memchr routines using
38/// regular slices on top of them.
39///
40/// Note that we use `#[cfg(target_feature = "sse2")]` below even though
41/// it shouldn't be strictly necessary because without it, it seems to
42/// cause the compiler to blow up. I guess it can't handle a function
43/// pointer being created with a sse target feature? Dunno. See the
44/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
45/// this.
46///
47/// # Safety
48///
49/// Primarily callers must that `$fnty` is a correct function pointer type and
50/// not something else.
51///
52/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
53/// routine that returns a valid function pointer when a match is found. That
54/// is, a pointer that is `>= start` and `< end`.
55///
56/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
57/// correspond to valid pointers.
58macro_rules! unsafe_ifunc {
59 (
60 $memchrty:ident,
61 $memchrfind:ident,
62 $fnty:ty,
63 $retty:ty,
64 $hay_start:ident,
65 $hay_end:ident,
66 $($needle:ident),+
67 ) => {{
68 #![allow(unused_unsafe)]
69
70 use core::sync::atomic::{AtomicPtr, Ordering};
71
72 type Fn = *mut ();
73 type RealFn = $fnty;
74 static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
75
76 #[cfg(target_feature = "sse2")]
77 #[target_feature(enable = "sse2", enable = "avx2")]
78 unsafe fn find_avx2(
79 $($needle: u8),+,
80 $hay_start: *const u8,
81 $hay_end: *const u8,
82 ) -> $retty {
83 use crate::arch::x86_64::avx2::memchr::$memchrty;
84 $memchrty::new_unchecked($($needle),+)
85 .$memchrfind($hay_start, $hay_end)
86 }
87
88 #[cfg(target_feature = "sse2")]
89 #[target_feature(enable = "sse2")]
90 unsafe fn find_sse2(
91 $($needle: u8),+,
92 $hay_start: *const u8,
93 $hay_end: *const u8,
94 ) -> $retty {
95 use crate::arch::x86_64::sse2::memchr::$memchrty;
96 $memchrty::new_unchecked($($needle),+)
97 .$memchrfind($hay_start, $hay_end)
98 }
99
100 unsafe fn find_fallback(
101 $($needle: u8),+,
102 $hay_start: *const u8,
103 $hay_end: *const u8,
104 ) -> $retty {
105 use crate::arch::all::memchr::$memchrty;
106 $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
107 }
108
109 unsafe fn detect(
110 $($needle: u8),+,
111 $hay_start: *const u8,
112 $hay_end: *const u8,
113 ) -> $retty {
114 let fun = {
115 #[cfg(not(target_feature = "sse2"))]
116 {
117 debug!(
118 "no sse2 feature available, using fallback for {}",
119 stringify!($memchrty),
120 );
121 find_fallback as RealFn
122 }
123 #[cfg(target_feature = "sse2")]
124 {
125 use crate::arch::x86_64::{sse2, avx2};
126 if avx2::memchr::$memchrty::is_available() {
127 debug!("chose AVX2 for {}", stringify!($memchrty));
128 find_avx2 as RealFn
129 } else if sse2::memchr::$memchrty::is_available() {
130 debug!("chose SSE2 for {}", stringify!($memchrty));
131 find_sse2 as RealFn
132 } else {
133 debug!("chose fallback for {}", stringify!($memchrty));
134 find_fallback as RealFn
135 }
136 }
137 };
138 FN.store(fun as Fn, Ordering::Relaxed);
139 // SAFETY: The only thing we need to uphold here is the
140 // `#[target_feature]` requirements. Since we check is_available
141 // above before using the corresponding implementation, we are
142 // guaranteed to only call code that is supported on the current
143 // CPU.
144 fun($($needle),+, $hay_start, $hay_end)
145 }
146
147 // SAFETY: By virtue of the caller contract, RealFn is a function
148 // pointer, which is always safe to transmute with a *mut (). Also,
149 // since we use $memchrty::is_available, it is guaranteed to be safe
150 // to call $memchrty::$memchrfind.
151 unsafe {
152 let fun = FN.load(Ordering::Relaxed);
153 core::mem::transmute::<Fn, RealFn>(fun)(
154 $($needle),+,
155 $hay_start,
156 $hay_end,
157 )
158 }
159 }};
160}
161
162// The routines below dispatch to AVX2, SSE2 or a fallback routine based on
163// what's available in the current environment. The secret sauce here is that
164// we only check for which one to use approximately once, and then "cache" that
165// choice into a global function pointer. Subsequent invocations then just call
166// the appropriate function directly.
167
168/// memchr, but using raw pointers to represent the haystack.
169///
170/// # Safety
171///
172/// Pointers must be valid. See `One::find_raw`.
173#[inline(always)]
174pub(crate) fn memchr_raw(
175 n1: u8,
176 start: *const u8,
177 end: *const u8,
178) -> Option<*const u8> {
179 // SAFETY: We provide a valid function pointer type.
180 unsafe_ifunc!(
181 One,
182 find_raw,
183 unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
184 Option<*const u8>,
185 start,
186 end,
187 n1
188 )
189}
190
191/// memrchr, but using raw pointers to represent the haystack.
192///
193/// # Safety
194///
195/// Pointers must be valid. See `One::rfind_raw`.
196#[inline(always)]
197pub(crate) fn memrchr_raw(
198 n1: u8,
199 start: *const u8,
200 end: *const u8,
201) -> Option<*const u8> {
202 // SAFETY: We provide a valid function pointer type.
203 unsafe_ifunc!(
204 One,
205 rfind_raw,
206 unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
207 Option<*const u8>,
208 start,
209 end,
210 n1
211 )
212}
213
214/// memchr2, but using raw pointers to represent the haystack.
215///
216/// # Safety
217///
218/// Pointers must be valid. See `Two::find_raw`.
219#[inline(always)]
220pub(crate) fn memchr2_raw(
221 n1: u8,
222 n2: u8,
223 start: *const u8,
224 end: *const u8,
225) -> Option<*const u8> {
226 // SAFETY: We provide a valid function pointer type.
227 unsafe_ifunc!(
228 Two,
229 find_raw,
230 unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
231 Option<*const u8>,
232 start,
233 end,
234 n1,
235 n2
236 )
237}
238
239/// memrchr2, but using raw pointers to represent the haystack.
240///
241/// # Safety
242///
243/// Pointers must be valid. See `Two::rfind_raw`.
244#[inline(always)]
245pub(crate) fn memrchr2_raw(
246 n1: u8,
247 n2: u8,
248 start: *const u8,
249 end: *const u8,
250) -> Option<*const u8> {
251 // SAFETY: We provide a valid function pointer type.
252 unsafe_ifunc!(
253 Two,
254 rfind_raw,
255 unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
256 Option<*const u8>,
257 start,
258 end,
259 n1,
260 n2
261 )
262}
263
264/// memchr3, but using raw pointers to represent the haystack.
265///
266/// # Safety
267///
268/// Pointers must be valid. See `Three::find_raw`.
269#[inline(always)]
270pub(crate) fn memchr3_raw(
271 n1: u8,
272 n2: u8,
273 n3: u8,
274 start: *const u8,
275 end: *const u8,
276) -> Option<*const u8> {
277 // SAFETY: We provide a valid function pointer type.
278 unsafe_ifunc!(
279 Three,
280 find_raw,
281 unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
282 Option<*const u8>,
283 start,
284 end,
285 n1,
286 n2,
287 n3
288 )
289}
290
291/// memrchr3, but using raw pointers to represent the haystack.
292///
293/// # Safety
294///
295/// Pointers must be valid. See `Three::rfind_raw`.
296#[inline(always)]
297pub(crate) fn memrchr3_raw(
298 n1: u8,
299 n2: u8,
300 n3: u8,
301 start: *const u8,
302 end: *const u8,
303) -> Option<*const u8> {
304 // SAFETY: We provide a valid function pointer type.
305 unsafe_ifunc!(
306 Three,
307 rfind_raw,
308 unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
309 Option<*const u8>,
310 start,
311 end,
312 n1,
313 n2,
314 n3
315 )
316}
317
318/// Count all matching bytes, but using raw pointers to represent the haystack.
319///
320/// # Safety
321///
322/// Pointers must be valid. See `One::count_raw`.
323#[inline(always)]
324pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
325 // SAFETY: We provide a valid function pointer type.
326 unsafe_ifunc!(
327 One,
328 count_raw,
329 unsafe fn(u8, *const u8, *const u8) -> usize,
330 usize,
331 start,
332 end,
333 n1
334 )
335}
336