1 | /*! |
2 | This module defines 256-bit vector implementations of `memchr` and friends. |
3 | |
4 | The main types in this module are [`One`], [`Two`] and [`Three`]. They are for |
5 | searching for one, two or three distinct bytes, respectively, in a haystack. |
6 | Each type also has corresponding double ended iterators. These searchers are |
7 | typically much faster than scalar routines accomplishing the same task. |
8 | |
9 | The `One` searcher also provides a [`One::count`] routine for efficiently |
10 | counting the number of times a single byte occurs in a haystack. This is |
11 | useful, for example, for counting the number of lines in a haystack. This |
12 | routine exists because it is usually faster, especially with a high match |
13 | count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its |
14 | `Iterator::count` implementation to use this routine.) |
15 | |
16 | Only one, two and three bytes are supported because three bytes is about |
17 | the point where one sees diminishing returns. Beyond this point and it's |
18 | probably (but not necessarily) better to just use a simple `[bool; 256]` array |
19 | or similar. However, it depends mightily on the specific work-load and the |
20 | expected match frequency. |
21 | */ |
22 | |
23 | use core::arch::x86_64::{__m128i, __m256i}; |
24 | |
25 | use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; |
26 | |
27 | /// Finds all occurrences of a single byte in a haystack. |
28 | #[derive(Clone, Copy, Debug)] |
29 | pub struct One { |
30 | /// Used for haystacks less than 32 bytes. |
31 | sse2: generic::One<__m128i>, |
32 | /// Used for haystacks bigger than 32 bytes. |
33 | avx2: generic::One<__m256i>, |
34 | } |
35 | |
36 | impl One { |
37 | /// Create a new searcher that finds occurrences of the needle byte given. |
38 | /// |
39 | /// This particular searcher is specialized to use AVX2 vector instructions |
40 | /// that typically make it quite fast. (SSE2 is used for haystacks that |
41 | /// are too short to accommodate an AVX2 vector.) |
42 | /// |
43 | /// If either SSE2 or AVX2 is unavailable in the current environment, then |
44 | /// `None` is returned. |
45 | #[inline ] |
46 | pub fn new(needle: u8) -> Option<One> { |
47 | if One::is_available() { |
48 | // SAFETY: we check that sse2 and avx2 are available above. |
49 | unsafe { Some(One::new_unchecked(needle)) } |
50 | } else { |
51 | None |
52 | } |
53 | } |
54 | |
55 | /// Create a new finder specific to AVX2 vectors and routines without |
56 | /// checking that either SSE2 or AVX2 is available. |
57 | /// |
58 | /// # Safety |
59 | /// |
60 | /// Callers must guarantee that it is safe to execute both `sse2` and |
61 | /// `avx2` instructions in the current environment. |
62 | /// |
63 | /// Note that it is a common misconception that if one compiles for an |
64 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
65 | /// instructions. While this is almost always the case, it isn't true in |
66 | /// 100% of cases. |
67 | #[target_feature (enable = "sse2" , enable = "avx2" )] |
68 | #[inline ] |
69 | pub unsafe fn new_unchecked(needle: u8) -> One { |
70 | One { |
71 | sse2: generic::One::new(needle), |
72 | avx2: generic::One::new(needle), |
73 | } |
74 | } |
75 | |
76 | /// Returns true when this implementation is available in the current |
77 | /// environment. |
78 | /// |
79 | /// When this is true, it is guaranteed that [`One::new`] will return |
80 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
81 | /// `One::new` will return a `None` value. |
82 | /// |
83 | /// Note also that for the lifetime of a single program, if this returns |
84 | /// true then it will always return true. |
85 | #[inline ] |
86 | pub fn is_available() -> bool { |
87 | #[cfg (not(target_feature = "sse2" ))] |
88 | { |
89 | false |
90 | } |
91 | #[cfg (target_feature = "sse2" )] |
92 | { |
93 | #[cfg (target_feature = "avx2" )] |
94 | { |
95 | true |
96 | } |
97 | #[cfg (not(target_feature = "avx2" ))] |
98 | { |
99 | #[cfg (feature = "std" )] |
100 | { |
101 | std::is_x86_feature_detected!("avx2" ) |
102 | } |
103 | #[cfg (not(feature = "std" ))] |
104 | { |
105 | false |
106 | } |
107 | } |
108 | } |
109 | } |
110 | |
111 | /// Return the first occurrence of one of the needle bytes in the given |
112 | /// haystack. If no such occurrence exists, then `None` is returned. |
113 | /// |
114 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
115 | /// value is `haystack.len() - 1`. |
116 | #[inline ] |
117 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
118 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
119 | // falls within the bounds of the start and end pointers. |
120 | unsafe { |
121 | generic::search_slice_with_raw(haystack, |s, e| { |
122 | self.find_raw(s, e) |
123 | }) |
124 | } |
125 | } |
126 | |
127 | /// Return the last occurrence of one of the needle bytes in the given |
128 | /// haystack. If no such occurrence exists, then `None` is returned. |
129 | /// |
130 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
131 | /// value is `haystack.len() - 1`. |
132 | #[inline ] |
133 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
134 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
135 | // falls within the bounds of the start and end pointers. |
136 | unsafe { |
137 | generic::search_slice_with_raw(haystack, |s, e| { |
138 | self.rfind_raw(s, e) |
139 | }) |
140 | } |
141 | } |
142 | |
143 | /// Counts all occurrences of this byte in the given haystack. |
144 | #[inline ] |
145 | pub fn count(&self, haystack: &[u8]) -> usize { |
146 | // SAFETY: All of our pointers are derived directly from a borrowed |
147 | // slice, which is guaranteed to be valid. |
148 | unsafe { |
149 | let start = haystack.as_ptr(); |
150 | let end = start.add(haystack.len()); |
151 | self.count_raw(start, end) |
152 | } |
153 | } |
154 | |
155 | /// Like `find`, but accepts and returns raw pointers. |
156 | /// |
157 | /// When a match is found, the pointer returned is guaranteed to be |
158 | /// `>= start` and `< end`. |
159 | /// |
160 | /// This routine is useful if you're already using raw pointers and would |
161 | /// like to avoid converting back to a slice before executing a search. |
162 | /// |
163 | /// # Safety |
164 | /// |
165 | /// * Both `start` and `end` must be valid for reads. |
166 | /// * Both `start` and `end` must point to an initialized value. |
167 | /// * Both `start` and `end` must point to the same allocated object and |
168 | /// must either be in bounds or at most one byte past the end of the |
169 | /// allocated object. |
170 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
171 | /// object. |
172 | /// * The distance between `start` and `end` must not overflow `isize`. |
173 | /// * The distance being in bounds must not rely on "wrapping around" the |
174 | /// address space. |
175 | /// |
176 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
177 | /// In that case, `None` will always be returned. |
178 | #[inline ] |
179 | pub unsafe fn find_raw( |
180 | &self, |
181 | start: *const u8, |
182 | end: *const u8, |
183 | ) -> Option<*const u8> { |
184 | if start >= end { |
185 | return None; |
186 | } |
187 | let len = end.distance(start); |
188 | if len < __m256i::BYTES { |
189 | return if len < __m128i::BYTES { |
190 | // SAFETY: We require the caller to pass valid start/end |
191 | // pointers. |
192 | generic::fwd_byte_by_byte(start, end, |b| { |
193 | b == self.sse2.needle1() |
194 | }) |
195 | } else { |
196 | // SAFETY: We require the caller to pass valid start/end |
197 | // pointers. |
198 | self.find_raw_sse2(start, end) |
199 | }; |
200 | } |
201 | // SAFETY: Building a `One` means it's safe to call both 'sse2' and |
202 | // 'avx2' routines. Also, we've checked that our haystack is big |
203 | // enough to run on the vector routine. Pointer validity is caller's |
204 | // responsibility. |
205 | // |
206 | // Note that we could call `self.avx2.find_raw` directly here. But that |
207 | // means we'd have to annotate this routine with `target_feature`. |
208 | // Which is fine, because this routine is `unsafe` anyway and the |
209 | // `target_feature` obligation is met by virtue of building a `One`. |
210 | // The real problem is that a routine with a `target_feature` |
211 | // annotation generally can't be inlined into caller code unless |
212 | // the caller code has the same target feature annotations. Namely, |
213 | // the common case (at time of writing) is for calling code to not |
214 | // have the `avx2` target feature enabled *at compile time*. Without |
215 | // `target_feature` on this routine, it can be inlined which will |
216 | // handle some of the short-haystack cases above without touching the |
217 | // architecture specific code. |
218 | self.find_raw_avx2(start, end) |
219 | } |
220 | |
221 | /// Like `rfind`, but accepts and returns raw pointers. |
222 | /// |
223 | /// When a match is found, the pointer returned is guaranteed to be |
224 | /// `>= start` and `< end`. |
225 | /// |
226 | /// This routine is useful if you're already using raw pointers and would |
227 | /// like to avoid converting back to a slice before executing a search. |
228 | /// |
229 | /// # Safety |
230 | /// |
231 | /// * Both `start` and `end` must be valid for reads. |
232 | /// * Both `start` and `end` must point to an initialized value. |
233 | /// * Both `start` and `end` must point to the same allocated object and |
234 | /// must either be in bounds or at most one byte past the end of the |
235 | /// allocated object. |
236 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
237 | /// object. |
238 | /// * The distance between `start` and `end` must not overflow `isize`. |
239 | /// * The distance being in bounds must not rely on "wrapping around" the |
240 | /// address space. |
241 | /// |
242 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
243 | /// In that case, `None` will always be returned. |
244 | #[inline ] |
245 | pub unsafe fn rfind_raw( |
246 | &self, |
247 | start: *const u8, |
248 | end: *const u8, |
249 | ) -> Option<*const u8> { |
250 | if start >= end { |
251 | return None; |
252 | } |
253 | let len = end.distance(start); |
254 | if len < __m256i::BYTES { |
255 | return if len < __m128i::BYTES { |
256 | // SAFETY: We require the caller to pass valid start/end |
257 | // pointers. |
258 | generic::rev_byte_by_byte(start, end, |b| { |
259 | b == self.sse2.needle1() |
260 | }) |
261 | } else { |
262 | // SAFETY: We require the caller to pass valid start/end |
263 | // pointers. |
264 | self.rfind_raw_sse2(start, end) |
265 | }; |
266 | } |
267 | // SAFETY: Building a `One` means it's safe to call both 'sse2' and |
268 | // 'avx2' routines. Also, we've checked that our haystack is big |
269 | // enough to run on the vector routine. Pointer validity is caller's |
270 | // responsibility. |
271 | // |
272 | // See note in forward routine above for why we don't just call |
273 | // `self.avx2.rfind_raw` directly here. |
274 | self.rfind_raw_avx2(start, end) |
275 | } |
276 | |
277 | /// Counts all occurrences of this byte in the given haystack represented |
278 | /// by raw pointers. |
279 | /// |
280 | /// This routine is useful if you're already using raw pointers and would |
281 | /// like to avoid converting back to a slice before executing a search. |
282 | /// |
283 | /// # Safety |
284 | /// |
285 | /// * Both `start` and `end` must be valid for reads. |
286 | /// * Both `start` and `end` must point to an initialized value. |
287 | /// * Both `start` and `end` must point to the same allocated object and |
288 | /// must either be in bounds or at most one byte past the end of the |
289 | /// allocated object. |
290 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
291 | /// object. |
292 | /// * The distance between `start` and `end` must not overflow `isize`. |
293 | /// * The distance being in bounds must not rely on "wrapping around" the |
294 | /// address space. |
295 | /// |
296 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
297 | /// In that case, `0` will always be returned. |
298 | #[inline ] |
299 | pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { |
300 | if start >= end { |
301 | return 0; |
302 | } |
303 | let len = end.distance(start); |
304 | if len < __m256i::BYTES { |
305 | return if len < __m128i::BYTES { |
306 | // SAFETY: We require the caller to pass valid start/end |
307 | // pointers. |
308 | generic::count_byte_by_byte(start, end, |b| { |
309 | b == self.sse2.needle1() |
310 | }) |
311 | } else { |
312 | // SAFETY: We require the caller to pass valid start/end |
313 | // pointers. |
314 | self.count_raw_sse2(start, end) |
315 | }; |
316 | } |
317 | // SAFETY: Building a `One` means it's safe to call both 'sse2' and |
318 | // 'avx2' routines. Also, we've checked that our haystack is big |
319 | // enough to run on the vector routine. Pointer validity is caller's |
320 | // responsibility. |
321 | self.count_raw_avx2(start, end) |
322 | } |
323 | |
324 | /// Execute a search using SSE2 vectors and routines. |
325 | /// |
326 | /// # Safety |
327 | /// |
328 | /// Same as [`One::find_raw`], except the distance between `start` and |
329 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
330 | /// |
331 | /// (The target feature safety obligation is automatically fulfilled by |
332 | /// virtue of being a method on `One`, which can only be constructed |
333 | /// when it is safe to call `sse2`/`avx2` routines.) |
334 | #[target_feature (enable = "sse2" )] |
335 | #[inline ] |
336 | unsafe fn find_raw_sse2( |
337 | &self, |
338 | start: *const u8, |
339 | end: *const u8, |
340 | ) -> Option<*const u8> { |
341 | self.sse2.find_raw(start, end) |
342 | } |
343 | |
344 | /// Execute a search using SSE2 vectors and routines. |
345 | /// |
346 | /// # Safety |
347 | /// |
348 | /// Same as [`One::rfind_raw`], except the distance between `start` and |
349 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
350 | /// |
351 | /// (The target feature safety obligation is automatically fulfilled by |
352 | /// virtue of being a method on `One`, which can only be constructed |
353 | /// when it is safe to call `sse2`/`avx2` routines.) |
354 | #[target_feature (enable = "sse2" )] |
355 | #[inline ] |
356 | unsafe fn rfind_raw_sse2( |
357 | &self, |
358 | start: *const u8, |
359 | end: *const u8, |
360 | ) -> Option<*const u8> { |
361 | self.sse2.rfind_raw(start, end) |
362 | } |
363 | |
364 | /// Execute a count using SSE2 vectors and routines. |
365 | /// |
366 | /// # Safety |
367 | /// |
368 | /// Same as [`One::count_raw`], except the distance between `start` and |
369 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
370 | /// |
371 | /// (The target feature safety obligation is automatically fulfilled by |
372 | /// virtue of being a method on `One`, which can only be constructed |
373 | /// when it is safe to call `sse2`/`avx2` routines.) |
374 | #[target_feature (enable = "sse2" )] |
375 | #[inline ] |
376 | unsafe fn count_raw_sse2( |
377 | &self, |
378 | start: *const u8, |
379 | end: *const u8, |
380 | ) -> usize { |
381 | self.sse2.count_raw(start, end) |
382 | } |
383 | |
384 | /// Execute a search using AVX2 vectors and routines. |
385 | /// |
386 | /// # Safety |
387 | /// |
388 | /// Same as [`One::find_raw`], except the distance between `start` and |
389 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
390 | /// |
391 | /// (The target feature safety obligation is automatically fulfilled by |
392 | /// virtue of being a method on `One`, which can only be constructed |
393 | /// when it is safe to call `sse2`/`avx2` routines.) |
394 | #[target_feature (enable = "avx2" )] |
395 | #[inline ] |
396 | unsafe fn find_raw_avx2( |
397 | &self, |
398 | start: *const u8, |
399 | end: *const u8, |
400 | ) -> Option<*const u8> { |
401 | self.avx2.find_raw(start, end) |
402 | } |
403 | |
404 | /// Execute a search using AVX2 vectors and routines. |
405 | /// |
406 | /// # Safety |
407 | /// |
408 | /// Same as [`One::rfind_raw`], except the distance between `start` and |
409 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
410 | /// |
411 | /// (The target feature safety obligation is automatically fulfilled by |
412 | /// virtue of being a method on `One`, which can only be constructed |
413 | /// when it is safe to call `sse2`/`avx2` routines.) |
414 | #[target_feature (enable = "avx2" )] |
415 | #[inline ] |
416 | unsafe fn rfind_raw_avx2( |
417 | &self, |
418 | start: *const u8, |
419 | end: *const u8, |
420 | ) -> Option<*const u8> { |
421 | self.avx2.rfind_raw(start, end) |
422 | } |
423 | |
424 | /// Execute a count using AVX2 vectors and routines. |
425 | /// |
426 | /// # Safety |
427 | /// |
428 | /// Same as [`One::count_raw`], except the distance between `start` and |
429 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
430 | /// |
431 | /// (The target feature safety obligation is automatically fulfilled by |
432 | /// virtue of being a method on `One`, which can only be constructed |
433 | /// when it is safe to call `sse2`/`avx2` routines.) |
434 | #[target_feature (enable = "avx2" )] |
435 | #[inline ] |
436 | unsafe fn count_raw_avx2( |
437 | &self, |
438 | start: *const u8, |
439 | end: *const u8, |
440 | ) -> usize { |
441 | self.avx2.count_raw(start, end) |
442 | } |
443 | |
444 | /// Returns an iterator over all occurrences of the needle byte in the |
445 | /// given haystack. |
446 | /// |
447 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
448 | /// can also be used to find occurrences in reverse order. |
449 | #[inline ] |
450 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { |
451 | OneIter { searcher: self, it: generic::Iter::new(haystack) } |
452 | } |
453 | } |
454 | |
455 | /// An iterator over all occurrences of a single byte in a haystack. |
456 | /// |
457 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
458 | /// used to find occurrences in reverse order. |
459 | /// |
460 | /// This iterator is created by the [`One::iter`] method. |
461 | /// |
462 | /// The lifetime parameters are as follows: |
463 | /// |
464 | /// * `'a` refers to the lifetime of the underlying [`One`] searcher. |
465 | /// * `'h` refers to the lifetime of the haystack being searched. |
466 | #[derive(Clone, Debug)] |
467 | pub struct OneIter<'a, 'h> { |
468 | searcher: &'a One, |
469 | it: generic::Iter<'h>, |
470 | } |
471 | |
472 | impl<'a, 'h> Iterator for OneIter<'a, 'h> { |
473 | type Item = usize; |
474 | |
475 | #[inline ] |
476 | fn next(&mut self) -> Option<usize> { |
477 | // SAFETY: We rely on the generic iterator to provide valid start |
478 | // and end pointers, but we guarantee that any pointer returned by |
479 | // 'find_raw' falls within the bounds of the start and end pointer. |
480 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
481 | } |
482 | |
483 | #[inline ] |
484 | fn count(self) -> usize { |
485 | self.it.count(|s, e| { |
486 | // SAFETY: We rely on our generic iterator to return valid start |
487 | // and end pointers. |
488 | unsafe { self.searcher.count_raw(s, e) } |
489 | }) |
490 | } |
491 | |
492 | #[inline ] |
493 | fn size_hint(&self) -> (usize, Option<usize>) { |
494 | self.it.size_hint() |
495 | } |
496 | } |
497 | |
498 | impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { |
499 | #[inline ] |
500 | fn next_back(&mut self) -> Option<usize> { |
501 | // SAFETY: We rely on the generic iterator to provide valid start |
502 | // and end pointers, but we guarantee that any pointer returned by |
503 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
504 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
505 | } |
506 | } |
507 | |
508 | impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} |
509 | |
510 | /// Finds all occurrences of two bytes in a haystack. |
511 | /// |
512 | /// That is, this reports matches of one of two possible bytes. For example, |
513 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |
514 | /// `4` and `5`. |
515 | #[derive(Clone, Copy, Debug)] |
516 | pub struct Two { |
517 | /// Used for haystacks less than 32 bytes. |
518 | sse2: generic::Two<__m128i>, |
519 | /// Used for haystacks bigger than 32 bytes. |
520 | avx2: generic::Two<__m256i>, |
521 | } |
522 | |
523 | impl Two { |
524 | /// Create a new searcher that finds occurrences of the needle bytes given. |
525 | /// |
526 | /// This particular searcher is specialized to use AVX2 vector instructions |
527 | /// that typically make it quite fast. (SSE2 is used for haystacks that |
528 | /// are too short to accommodate an AVX2 vector.) |
529 | /// |
530 | /// If either SSE2 or AVX2 is unavailable in the current environment, then |
531 | /// `None` is returned. |
532 | #[inline ] |
533 | pub fn new(needle1: u8, needle2: u8) -> Option<Two> { |
534 | if Two::is_available() { |
535 | // SAFETY: we check that sse2 and avx2 are available above. |
536 | unsafe { Some(Two::new_unchecked(needle1, needle2)) } |
537 | } else { |
538 | None |
539 | } |
540 | } |
541 | |
542 | /// Create a new finder specific to AVX2 vectors and routines without |
543 | /// checking that either SSE2 or AVX2 is available. |
544 | /// |
545 | /// # Safety |
546 | /// |
547 | /// Callers must guarantee that it is safe to execute both `sse2` and |
548 | /// `avx2` instructions in the current environment. |
549 | /// |
550 | /// Note that it is a common misconception that if one compiles for an |
551 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
552 | /// instructions. While this is almost always the case, it isn't true in |
553 | /// 100% of cases. |
554 | #[target_feature (enable = "sse2" , enable = "avx2" )] |
555 | #[inline ] |
556 | pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { |
557 | Two { |
558 | sse2: generic::Two::new(needle1, needle2), |
559 | avx2: generic::Two::new(needle1, needle2), |
560 | } |
561 | } |
562 | |
563 | /// Returns true when this implementation is available in the current |
564 | /// environment. |
565 | /// |
566 | /// When this is true, it is guaranteed that [`Two::new`] will return |
567 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
568 | /// `Two::new` will return a `None` value. |
569 | /// |
570 | /// Note also that for the lifetime of a single program, if this returns |
571 | /// true then it will always return true. |
572 | #[inline ] |
573 | pub fn is_available() -> bool { |
574 | #[cfg (not(target_feature = "sse2" ))] |
575 | { |
576 | false |
577 | } |
578 | #[cfg (target_feature = "sse2" )] |
579 | { |
580 | #[cfg (target_feature = "avx2" )] |
581 | { |
582 | true |
583 | } |
584 | #[cfg (not(target_feature = "avx2" ))] |
585 | { |
586 | #[cfg (feature = "std" )] |
587 | { |
588 | std::is_x86_feature_detected!("avx2" ) |
589 | } |
590 | #[cfg (not(feature = "std" ))] |
591 | { |
592 | false |
593 | } |
594 | } |
595 | } |
596 | } |
597 | |
598 | /// Return the first occurrence of one of the needle bytes in the given |
599 | /// haystack. If no such occurrence exists, then `None` is returned. |
600 | /// |
601 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
602 | /// value is `haystack.len() - 1`. |
603 | #[inline ] |
604 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
605 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
606 | // falls within the bounds of the start and end pointers. |
607 | unsafe { |
608 | generic::search_slice_with_raw(haystack, |s, e| { |
609 | self.find_raw(s, e) |
610 | }) |
611 | } |
612 | } |
613 | |
614 | /// Return the last occurrence of one of the needle bytes in the given |
615 | /// haystack. If no such occurrence exists, then `None` is returned. |
616 | /// |
617 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
618 | /// value is `haystack.len() - 1`. |
619 | #[inline ] |
620 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
621 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
622 | // falls within the bounds of the start and end pointers. |
623 | unsafe { |
624 | generic::search_slice_with_raw(haystack, |s, e| { |
625 | self.rfind_raw(s, e) |
626 | }) |
627 | } |
628 | } |
629 | |
630 | /// Like `find`, but accepts and returns raw pointers. |
631 | /// |
632 | /// When a match is found, the pointer returned is guaranteed to be |
633 | /// `>= start` and `< end`. |
634 | /// |
635 | /// This routine is useful if you're already using raw pointers and would |
636 | /// like to avoid converting back to a slice before executing a search. |
637 | /// |
638 | /// # Safety |
639 | /// |
640 | /// * Both `start` and `end` must be valid for reads. |
641 | /// * Both `start` and `end` must point to an initialized value. |
642 | /// * Both `start` and `end` must point to the same allocated object and |
643 | /// must either be in bounds or at most one byte past the end of the |
644 | /// allocated object. |
645 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
646 | /// object. |
647 | /// * The distance between `start` and `end` must not overflow `isize`. |
648 | /// * The distance being in bounds must not rely on "wrapping around" the |
649 | /// address space. |
650 | /// |
651 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
652 | /// In that case, `None` will always be returned. |
653 | #[inline ] |
654 | pub unsafe fn find_raw( |
655 | &self, |
656 | start: *const u8, |
657 | end: *const u8, |
658 | ) -> Option<*const u8> { |
659 | if start >= end { |
660 | return None; |
661 | } |
662 | let len = end.distance(start); |
663 | if len < __m256i::BYTES { |
664 | return if len < __m128i::BYTES { |
665 | // SAFETY: We require the caller to pass valid start/end |
666 | // pointers. |
667 | generic::fwd_byte_by_byte(start, end, |b| { |
668 | b == self.sse2.needle1() || b == self.sse2.needle2() |
669 | }) |
670 | } else { |
671 | // SAFETY: We require the caller to pass valid start/end |
672 | // pointers. |
673 | self.find_raw_sse2(start, end) |
674 | }; |
675 | } |
676 | // SAFETY: Building a `Two` means it's safe to call both 'sse2' and |
677 | // 'avx2' routines. Also, we've checked that our haystack is big |
678 | // enough to run on the vector routine. Pointer validity is caller's |
679 | // responsibility. |
680 | // |
681 | // Note that we could call `self.avx2.find_raw` directly here. But that |
682 | // means we'd have to annotate this routine with `target_feature`. |
683 | // Which is fine, because this routine is `unsafe` anyway and the |
684 | // `target_feature` obligation is met by virtue of building a `Two`. |
685 | // The real problem is that a routine with a `target_feature` |
686 | // annotation generally can't be inlined into caller code unless |
687 | // the caller code has the same target feature annotations. Namely, |
688 | // the common case (at time of writing) is for calling code to not |
689 | // have the `avx2` target feature enabled *at compile time*. Without |
690 | // `target_feature` on this routine, it can be inlined which will |
691 | // handle some of the short-haystack cases above without touching the |
692 | // architecture specific code. |
693 | self.find_raw_avx2(start, end) |
694 | } |
695 | |
696 | /// Like `rfind`, but accepts and returns raw pointers. |
697 | /// |
698 | /// When a match is found, the pointer returned is guaranteed to be |
699 | /// `>= start` and `< end`. |
700 | /// |
701 | /// This routine is useful if you're already using raw pointers and would |
702 | /// like to avoid converting back to a slice before executing a search. |
703 | /// |
704 | /// # Safety |
705 | /// |
706 | /// * Both `start` and `end` must be valid for reads. |
707 | /// * Both `start` and `end` must point to an initialized value. |
708 | /// * Both `start` and `end` must point to the same allocated object and |
709 | /// must either be in bounds or at most one byte past the end of the |
710 | /// allocated object. |
711 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
712 | /// object. |
713 | /// * The distance between `start` and `end` must not overflow `isize`. |
714 | /// * The distance being in bounds must not rely on "wrapping around" the |
715 | /// address space. |
716 | /// |
717 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
718 | /// In that case, `None` will always be returned. |
719 | #[inline ] |
720 | pub unsafe fn rfind_raw( |
721 | &self, |
722 | start: *const u8, |
723 | end: *const u8, |
724 | ) -> Option<*const u8> { |
725 | if start >= end { |
726 | return None; |
727 | } |
728 | let len = end.distance(start); |
729 | if len < __m256i::BYTES { |
730 | return if len < __m128i::BYTES { |
731 | // SAFETY: We require the caller to pass valid start/end |
732 | // pointers. |
733 | generic::rev_byte_by_byte(start, end, |b| { |
734 | b == self.sse2.needle1() || b == self.sse2.needle2() |
735 | }) |
736 | } else { |
737 | // SAFETY: We require the caller to pass valid start/end |
738 | // pointers. |
739 | self.rfind_raw_sse2(start, end) |
740 | }; |
741 | } |
742 | // SAFETY: Building a `Two` means it's safe to call both 'sse2' and |
743 | // 'avx2' routines. Also, we've checked that our haystack is big |
744 | // enough to run on the vector routine. Pointer validity is caller's |
745 | // responsibility. |
746 | // |
747 | // See note in forward routine above for why we don't just call |
748 | // `self.avx2.rfind_raw` directly here. |
749 | self.rfind_raw_avx2(start, end) |
750 | } |
751 | |
752 | /// Execute a search using SSE2 vectors and routines. |
753 | /// |
754 | /// # Safety |
755 | /// |
756 | /// Same as [`Two::find_raw`], except the distance between `start` and |
757 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
758 | /// |
759 | /// (The target feature safety obligation is automatically fulfilled by |
760 | /// virtue of being a method on `Two`, which can only be constructed |
761 | /// when it is safe to call `sse2`/`avx2` routines.) |
762 | #[target_feature (enable = "sse2" )] |
763 | #[inline ] |
764 | unsafe fn find_raw_sse2( |
765 | &self, |
766 | start: *const u8, |
767 | end: *const u8, |
768 | ) -> Option<*const u8> { |
769 | self.sse2.find_raw(start, end) |
770 | } |
771 | |
772 | /// Execute a search using SSE2 vectors and routines. |
773 | /// |
774 | /// # Safety |
775 | /// |
776 | /// Same as [`Two::rfind_raw`], except the distance between `start` and |
777 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
778 | /// |
779 | /// (The target feature safety obligation is automatically fulfilled by |
780 | /// virtue of being a method on `Two`, which can only be constructed |
781 | /// when it is safe to call `sse2`/`avx2` routines.) |
782 | #[target_feature (enable = "sse2" )] |
783 | #[inline ] |
784 | unsafe fn rfind_raw_sse2( |
785 | &self, |
786 | start: *const u8, |
787 | end: *const u8, |
788 | ) -> Option<*const u8> { |
789 | self.sse2.rfind_raw(start, end) |
790 | } |
791 | |
792 | /// Execute a search using AVX2 vectors and routines. |
793 | /// |
794 | /// # Safety |
795 | /// |
796 | /// Same as [`Two::find_raw`], except the distance between `start` and |
797 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
798 | /// |
799 | /// (The target feature safety obligation is automatically fulfilled by |
800 | /// virtue of being a method on `Two`, which can only be constructed |
801 | /// when it is safe to call `sse2`/`avx2` routines.) |
802 | #[target_feature (enable = "avx2" )] |
803 | #[inline ] |
804 | unsafe fn find_raw_avx2( |
805 | &self, |
806 | start: *const u8, |
807 | end: *const u8, |
808 | ) -> Option<*const u8> { |
809 | self.avx2.find_raw(start, end) |
810 | } |
811 | |
812 | /// Execute a search using AVX2 vectors and routines. |
813 | /// |
814 | /// # Safety |
815 | /// |
816 | /// Same as [`Two::rfind_raw`], except the distance between `start` and |
817 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
818 | /// |
819 | /// (The target feature safety obligation is automatically fulfilled by |
820 | /// virtue of being a method on `Two`, which can only be constructed |
821 | /// when it is safe to call `sse2`/`avx2` routines.) |
822 | #[target_feature (enable = "avx2" )] |
823 | #[inline ] |
824 | unsafe fn rfind_raw_avx2( |
825 | &self, |
826 | start: *const u8, |
827 | end: *const u8, |
828 | ) -> Option<*const u8> { |
829 | self.avx2.rfind_raw(start, end) |
830 | } |
831 | |
832 | /// Returns an iterator over all occurrences of the needle bytes in the |
833 | /// given haystack. |
834 | /// |
835 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
836 | /// can also be used to find occurrences in reverse order. |
837 | #[inline ] |
838 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { |
839 | TwoIter { searcher: self, it: generic::Iter::new(haystack) } |
840 | } |
841 | } |
842 | |
843 | /// An iterator over all occurrences of two possible bytes in a haystack. |
844 | /// |
845 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
846 | /// used to find occurrences in reverse order. |
847 | /// |
848 | /// This iterator is created by the [`Two::iter`] method. |
849 | /// |
850 | /// The lifetime parameters are as follows: |
851 | /// |
852 | /// * `'a` refers to the lifetime of the underlying [`Two`] searcher. |
853 | /// * `'h` refers to the lifetime of the haystack being searched. |
854 | #[derive(Clone, Debug)] |
855 | pub struct TwoIter<'a, 'h> { |
856 | searcher: &'a Two, |
857 | it: generic::Iter<'h>, |
858 | } |
859 | |
860 | impl<'a, 'h> Iterator for TwoIter<'a, 'h> { |
861 | type Item = usize; |
862 | |
863 | #[inline ] |
864 | fn next(&mut self) -> Option<usize> { |
865 | // SAFETY: We rely on the generic iterator to provide valid start |
866 | // and end pointers, but we guarantee that any pointer returned by |
867 | // 'find_raw' falls within the bounds of the start and end pointer. |
868 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
869 | } |
870 | |
871 | #[inline ] |
872 | fn size_hint(&self) -> (usize, Option<usize>) { |
873 | self.it.size_hint() |
874 | } |
875 | } |
876 | |
877 | impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { |
878 | #[inline ] |
879 | fn next_back(&mut self) -> Option<usize> { |
880 | // SAFETY: We rely on the generic iterator to provide valid start |
881 | // and end pointers, but we guarantee that any pointer returned by |
882 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
883 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
884 | } |
885 | } |
886 | |
887 | impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} |
888 | |
889 | /// Finds all occurrences of three bytes in a haystack. |
890 | /// |
891 | /// That is, this reports matches of one of three possible bytes. For example, |
892 | /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets |
893 | /// `0`, `2`, `3`, `4` and `5`. |
894 | #[derive(Clone, Copy, Debug)] |
895 | pub struct Three { |
896 | /// Used for haystacks less than 32 bytes. |
897 | sse2: generic::Three<__m128i>, |
898 | /// Used for haystacks bigger than 32 bytes. |
899 | avx2: generic::Three<__m256i>, |
900 | } |
901 | |
902 | impl Three { |
903 | /// Create a new searcher that finds occurrences of the needle bytes given. |
904 | /// |
905 | /// This particular searcher is specialized to use AVX2 vector instructions |
906 | /// that typically make it quite fast. (SSE2 is used for haystacks that |
907 | /// are too short to accommodate an AVX2 vector.) |
908 | /// |
909 | /// If either SSE2 or AVX2 is unavailable in the current environment, then |
910 | /// `None` is returned. |
911 | #[inline ] |
912 | pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { |
913 | if Three::is_available() { |
914 | // SAFETY: we check that sse2 and avx2 are available above. |
915 | unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } |
916 | } else { |
917 | None |
918 | } |
919 | } |
920 | |
921 | /// Create a new finder specific to AVX2 vectors and routines without |
922 | /// checking that either SSE2 or AVX2 is available. |
923 | /// |
924 | /// # Safety |
925 | /// |
926 | /// Callers must guarantee that it is safe to execute both `sse2` and |
927 | /// `avx2` instructions in the current environment. |
928 | /// |
929 | /// Note that it is a common misconception that if one compiles for an |
930 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
931 | /// instructions. While this is almost always the case, it isn't true in |
932 | /// 100% of cases. |
933 | #[target_feature (enable = "sse2" , enable = "avx2" )] |
934 | #[inline ] |
935 | pub unsafe fn new_unchecked( |
936 | needle1: u8, |
937 | needle2: u8, |
938 | needle3: u8, |
939 | ) -> Three { |
940 | Three { |
941 | sse2: generic::Three::new(needle1, needle2, needle3), |
942 | avx2: generic::Three::new(needle1, needle2, needle3), |
943 | } |
944 | } |
945 | |
946 | /// Returns true when this implementation is available in the current |
947 | /// environment. |
948 | /// |
949 | /// When this is true, it is guaranteed that [`Three::new`] will return |
950 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
951 | /// `Three::new` will return a `None` value. |
952 | /// |
953 | /// Note also that for the lifetime of a single program, if this returns |
954 | /// true then it will always return true. |
955 | #[inline ] |
956 | pub fn is_available() -> bool { |
957 | #[cfg (not(target_feature = "sse2" ))] |
958 | { |
959 | false |
960 | } |
961 | #[cfg (target_feature = "sse2" )] |
962 | { |
963 | #[cfg (target_feature = "avx2" )] |
964 | { |
965 | true |
966 | } |
967 | #[cfg (not(target_feature = "avx2" ))] |
968 | { |
969 | #[cfg (feature = "std" )] |
970 | { |
971 | std::is_x86_feature_detected!("avx2" ) |
972 | } |
973 | #[cfg (not(feature = "std" ))] |
974 | { |
975 | false |
976 | } |
977 | } |
978 | } |
979 | } |
980 | |
981 | /// Return the first occurrence of one of the needle bytes in the given |
982 | /// haystack. If no such occurrence exists, then `None` is returned. |
983 | /// |
984 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
985 | /// value is `haystack.len() - 1`. |
986 | #[inline ] |
987 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
988 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
989 | // falls within the bounds of the start and end pointers. |
990 | unsafe { |
991 | generic::search_slice_with_raw(haystack, |s, e| { |
992 | self.find_raw(s, e) |
993 | }) |
994 | } |
995 | } |
996 | |
997 | /// Return the last occurrence of one of the needle bytes in the given |
998 | /// haystack. If no such occurrence exists, then `None` is returned. |
999 | /// |
1000 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
1001 | /// value is `haystack.len() - 1`. |
1002 | #[inline ] |
1003 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
1004 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
1005 | // falls within the bounds of the start and end pointers. |
1006 | unsafe { |
1007 | generic::search_slice_with_raw(haystack, |s, e| { |
1008 | self.rfind_raw(s, e) |
1009 | }) |
1010 | } |
1011 | } |
1012 | |
1013 | /// Like `find`, but accepts and returns raw pointers. |
1014 | /// |
1015 | /// When a match is found, the pointer returned is guaranteed to be |
1016 | /// `>= start` and `< end`. |
1017 | /// |
1018 | /// This routine is useful if you're already using raw pointers and would |
1019 | /// like to avoid converting back to a slice before executing a search. |
1020 | /// |
1021 | /// # Safety |
1022 | /// |
1023 | /// * Both `start` and `end` must be valid for reads. |
1024 | /// * Both `start` and `end` must point to an initialized value. |
1025 | /// * Both `start` and `end` must point to the same allocated object and |
1026 | /// must either be in bounds or at most one byte past the end of the |
1027 | /// allocated object. |
1028 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
1029 | /// object. |
1030 | /// * The distance between `start` and `end` must not overflow `isize`. |
1031 | /// * The distance being in bounds must not rely on "wrapping around" the |
1032 | /// address space. |
1033 | /// |
1034 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
1035 | /// In that case, `None` will always be returned. |
1036 | #[inline ] |
1037 | pub unsafe fn find_raw( |
1038 | &self, |
1039 | start: *const u8, |
1040 | end: *const u8, |
1041 | ) -> Option<*const u8> { |
1042 | if start >= end { |
1043 | return None; |
1044 | } |
1045 | let len = end.distance(start); |
1046 | if len < __m256i::BYTES { |
1047 | return if len < __m128i::BYTES { |
1048 | // SAFETY: We require the caller to pass valid start/end |
1049 | // pointers. |
1050 | generic::fwd_byte_by_byte(start, end, |b| { |
1051 | b == self.sse2.needle1() |
1052 | || b == self.sse2.needle2() |
1053 | || b == self.sse2.needle3() |
1054 | }) |
1055 | } else { |
1056 | // SAFETY: We require the caller to pass valid start/end |
1057 | // pointers. |
1058 | self.find_raw_sse2(start, end) |
1059 | }; |
1060 | } |
1061 | // SAFETY: Building a `Three` means it's safe to call both 'sse2' and |
1062 | // 'avx2' routines. Also, we've checked that our haystack is big |
1063 | // enough to run on the vector routine. Pointer validity is caller's |
1064 | // responsibility. |
1065 | // |
1066 | // Note that we could call `self.avx2.find_raw` directly here. But that |
1067 | // means we'd have to annotate this routine with `target_feature`. |
1068 | // Which is fine, because this routine is `unsafe` anyway and the |
1069 | // `target_feature` obligation is met by virtue of building a `Three`. |
1070 | // The real problem is that a routine with a `target_feature` |
1071 | // annotation generally can't be inlined into caller code unless |
1072 | // the caller code has the same target feature annotations. Namely, |
1073 | // the common case (at time of writing) is for calling code to not |
1074 | // have the `avx2` target feature enabled *at compile time*. Without |
1075 | // `target_feature` on this routine, it can be inlined which will |
1076 | // handle some of the short-haystack cases above without touching the |
1077 | // architecture specific code. |
1078 | self.find_raw_avx2(start, end) |
1079 | } |
1080 | |
1081 | /// Like `rfind`, but accepts and returns raw pointers. |
1082 | /// |
1083 | /// When a match is found, the pointer returned is guaranteed to be |
1084 | /// `>= start` and `< end`. |
1085 | /// |
1086 | /// This routine is useful if you're already using raw pointers and would |
1087 | /// like to avoid converting back to a slice before executing a search. |
1088 | /// |
1089 | /// # Safety |
1090 | /// |
1091 | /// * Both `start` and `end` must be valid for reads. |
1092 | /// * Both `start` and `end` must point to an initialized value. |
1093 | /// * Both `start` and `end` must point to the same allocated object and |
1094 | /// must either be in bounds or at most one byte past the end of the |
1095 | /// allocated object. |
1096 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
1097 | /// object. |
1098 | /// * The distance between `start` and `end` must not overflow `isize`. |
1099 | /// * The distance being in bounds must not rely on "wrapping around" the |
1100 | /// address space. |
1101 | /// |
1102 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
1103 | /// In that case, `None` will always be returned. |
1104 | #[inline ] |
1105 | pub unsafe fn rfind_raw( |
1106 | &self, |
1107 | start: *const u8, |
1108 | end: *const u8, |
1109 | ) -> Option<*const u8> { |
1110 | if start >= end { |
1111 | return None; |
1112 | } |
1113 | let len = end.distance(start); |
1114 | if len < __m256i::BYTES { |
1115 | return if len < __m128i::BYTES { |
1116 | // SAFETY: We require the caller to pass valid start/end |
1117 | // pointers. |
1118 | generic::rev_byte_by_byte(start, end, |b| { |
1119 | b == self.sse2.needle1() |
1120 | || b == self.sse2.needle2() |
1121 | || b == self.sse2.needle3() |
1122 | }) |
1123 | } else { |
1124 | // SAFETY: We require the caller to pass valid start/end |
1125 | // pointers. |
1126 | self.rfind_raw_sse2(start, end) |
1127 | }; |
1128 | } |
1129 | // SAFETY: Building a `Three` means it's safe to call both 'sse2' and |
1130 | // 'avx2' routines. Also, we've checked that our haystack is big |
1131 | // enough to run on the vector routine. Pointer validity is caller's |
1132 | // responsibility. |
1133 | // |
1134 | // See note in forward routine above for why we don't just call |
1135 | // `self.avx2.rfind_raw` directly here. |
1136 | self.rfind_raw_avx2(start, end) |
1137 | } |
1138 | |
1139 | /// Execute a search using SSE2 vectors and routines. |
1140 | /// |
1141 | /// # Safety |
1142 | /// |
1143 | /// Same as [`Three::find_raw`], except the distance between `start` and |
1144 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
1145 | /// |
1146 | /// (The target feature safety obligation is automatically fulfilled by |
1147 | /// virtue of being a method on `Three`, which can only be constructed |
1148 | /// when it is safe to call `sse2`/`avx2` routines.) |
1149 | #[target_feature (enable = "sse2" )] |
1150 | #[inline ] |
1151 | unsafe fn find_raw_sse2( |
1152 | &self, |
1153 | start: *const u8, |
1154 | end: *const u8, |
1155 | ) -> Option<*const u8> { |
1156 | self.sse2.find_raw(start, end) |
1157 | } |
1158 | |
1159 | /// Execute a search using SSE2 vectors and routines. |
1160 | /// |
1161 | /// # Safety |
1162 | /// |
1163 | /// Same as [`Three::rfind_raw`], except the distance between `start` and |
1164 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
1165 | /// |
1166 | /// (The target feature safety obligation is automatically fulfilled by |
1167 | /// virtue of being a method on `Three`, which can only be constructed |
1168 | /// when it is safe to call `sse2`/`avx2` routines.) |
1169 | #[target_feature (enable = "sse2" )] |
1170 | #[inline ] |
1171 | unsafe fn rfind_raw_sse2( |
1172 | &self, |
1173 | start: *const u8, |
1174 | end: *const u8, |
1175 | ) -> Option<*const u8> { |
1176 | self.sse2.rfind_raw(start, end) |
1177 | } |
1178 | |
1179 | /// Execute a search using AVX2 vectors and routines. |
1180 | /// |
1181 | /// # Safety |
1182 | /// |
1183 | /// Same as [`Three::find_raw`], except the distance between `start` and |
1184 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
1185 | /// |
1186 | /// (The target feature safety obligation is automatically fulfilled by |
1187 | /// virtue of being a method on `Three`, which can only be constructed |
1188 | /// when it is safe to call `sse2`/`avx2` routines.) |
1189 | #[target_feature (enable = "avx2" )] |
1190 | #[inline ] |
1191 | unsafe fn find_raw_avx2( |
1192 | &self, |
1193 | start: *const u8, |
1194 | end: *const u8, |
1195 | ) -> Option<*const u8> { |
1196 | self.avx2.find_raw(start, end) |
1197 | } |
1198 | |
1199 | /// Execute a search using AVX2 vectors and routines. |
1200 | /// |
1201 | /// # Safety |
1202 | /// |
1203 | /// Same as [`Three::rfind_raw`], except the distance between `start` and |
1204 | /// `end` must be at least the size of an AVX2 vector (in bytes). |
1205 | /// |
1206 | /// (The target feature safety obligation is automatically fulfilled by |
1207 | /// virtue of being a method on `Three`, which can only be constructed |
1208 | /// when it is safe to call `sse2`/`avx2` routines.) |
1209 | #[target_feature (enable = "avx2" )] |
1210 | #[inline ] |
1211 | unsafe fn rfind_raw_avx2( |
1212 | &self, |
1213 | start: *const u8, |
1214 | end: *const u8, |
1215 | ) -> Option<*const u8> { |
1216 | self.avx2.rfind_raw(start, end) |
1217 | } |
1218 | |
1219 | /// Returns an iterator over all occurrences of the needle bytes in the |
1220 | /// given haystack. |
1221 | /// |
1222 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
1223 | /// can also be used to find occurrences in reverse order. |
1224 | #[inline ] |
1225 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { |
1226 | ThreeIter { searcher: self, it: generic::Iter::new(haystack) } |
1227 | } |
1228 | } |
1229 | |
1230 | /// An iterator over all occurrences of three possible bytes in a haystack. |
1231 | /// |
1232 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
1233 | /// used to find occurrences in reverse order. |
1234 | /// |
1235 | /// This iterator is created by the [`Three::iter`] method. |
1236 | /// |
1237 | /// The lifetime parameters are as follows: |
1238 | /// |
1239 | /// * `'a` refers to the lifetime of the underlying [`Three`] searcher. |
1240 | /// * `'h` refers to the lifetime of the haystack being searched. |
1241 | #[derive(Clone, Debug)] |
1242 | pub struct ThreeIter<'a, 'h> { |
1243 | searcher: &'a Three, |
1244 | it: generic::Iter<'h>, |
1245 | } |
1246 | |
1247 | impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { |
1248 | type Item = usize; |
1249 | |
1250 | #[inline ] |
1251 | fn next(&mut self) -> Option<usize> { |
1252 | // SAFETY: We rely on the generic iterator to provide valid start |
1253 | // and end pointers, but we guarantee that any pointer returned by |
1254 | // 'find_raw' falls within the bounds of the start and end pointer. |
1255 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
1256 | } |
1257 | |
1258 | #[inline ] |
1259 | fn size_hint(&self) -> (usize, Option<usize>) { |
1260 | self.it.size_hint() |
1261 | } |
1262 | } |
1263 | |
1264 | impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { |
1265 | #[inline ] |
1266 | fn next_back(&mut self) -> Option<usize> { |
1267 | // SAFETY: We rely on the generic iterator to provide valid start |
1268 | // and end pointers, but we guarantee that any pointer returned by |
1269 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
1270 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
1271 | } |
1272 | } |
1273 | |
1274 | impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} |
1275 | |
1276 | #[cfg (test)] |
1277 | mod tests { |
1278 | use super::*; |
1279 | |
1280 | define_memchr_quickcheck!(super); |
1281 | |
1282 | #[test] |
1283 | fn forward_one() { |
1284 | crate::tests::memchr::Runner::new(1).forward_iter( |
1285 | |haystack, needles| { |
1286 | Some(One::new(needles[0])?.iter(haystack).collect()) |
1287 | }, |
1288 | ) |
1289 | } |
1290 | |
1291 | #[test] |
1292 | fn reverse_one() { |
1293 | crate::tests::memchr::Runner::new(1).reverse_iter( |
1294 | |haystack, needles| { |
1295 | Some(One::new(needles[0])?.iter(haystack).rev().collect()) |
1296 | }, |
1297 | ) |
1298 | } |
1299 | |
1300 | #[test] |
1301 | fn count_one() { |
1302 | crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { |
1303 | Some(One::new(needles[0])?.iter(haystack).count()) |
1304 | }) |
1305 | } |
1306 | |
1307 | #[test] |
1308 | fn forward_two() { |
1309 | crate::tests::memchr::Runner::new(2).forward_iter( |
1310 | |haystack, needles| { |
1311 | let n1 = needles.get(0).copied()?; |
1312 | let n2 = needles.get(1).copied()?; |
1313 | Some(Two::new(n1, n2)?.iter(haystack).collect()) |
1314 | }, |
1315 | ) |
1316 | } |
1317 | |
1318 | #[test] |
1319 | fn reverse_two() { |
1320 | crate::tests::memchr::Runner::new(2).reverse_iter( |
1321 | |haystack, needles| { |
1322 | let n1 = needles.get(0).copied()?; |
1323 | let n2 = needles.get(1).copied()?; |
1324 | Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) |
1325 | }, |
1326 | ) |
1327 | } |
1328 | |
1329 | #[test] |
1330 | fn forward_three() { |
1331 | crate::tests::memchr::Runner::new(3).forward_iter( |
1332 | |haystack, needles| { |
1333 | let n1 = needles.get(0).copied()?; |
1334 | let n2 = needles.get(1).copied()?; |
1335 | let n3 = needles.get(2).copied()?; |
1336 | Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) |
1337 | }, |
1338 | ) |
1339 | } |
1340 | |
1341 | #[test] |
1342 | fn reverse_three() { |
1343 | crate::tests::memchr::Runner::new(3).reverse_iter( |
1344 | |haystack, needles| { |
1345 | let n1 = needles.get(0).copied()?; |
1346 | let n2 = needles.get(1).copied()?; |
1347 | let n3 = needles.get(2).copied()?; |
1348 | Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) |
1349 | }, |
1350 | ) |
1351 | } |
1352 | } |
1353 | |