1/*!
2This module defines 128-bit vector implementations of `memchr` and friends.
3
4The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5searching for one, two or three distinct bytes, respectively, in a haystack.
6Each type also has corresponding double ended iterators. These searchers are
7typically much faster than scalar routines accomplishing the same task.
8
9The `One` searcher also provides a [`One::count`] routine for efficiently
10counting the number of times a single byte occurs in a haystack. This is
11useful, for example, for counting the number of lines in a haystack. This
12routine exists because it is usually faster, especially with a high match
13count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
14`Iterator::count` implementation to use this routine.)
15
16Only one, two and three bytes are supported because three bytes is about
17the point where one sees diminishing returns. Beyond this point and it's
18probably (but not necessarily) better to just use a simple `[bool; 256]` array
19or similar. However, it depends mightily on the specific work-load and the
20expected match frequency.
21*/
22
23use core::arch::x86_64::__m128i;
24
25use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
26
27/// Finds all occurrences of a single byte in a haystack.
28#[derive(Clone, Copy, Debug)]
29pub struct One(generic::One<__m128i>);
30
31impl One {
32 /// Create a new searcher that finds occurrences of the needle byte given.
33 ///
34 /// This particular searcher is specialized to use SSE2 vector instructions
35 /// that typically make it quite fast.
36 ///
37 /// If SSE2 is unavailable in the current environment, then `None` is
38 /// returned.
39 #[inline]
40 pub fn new(needle: u8) -> Option<One> {
41 if One::is_available() {
42 // SAFETY: we check that sse2 is available above.
43 unsafe { Some(One::new_unchecked(needle)) }
44 } else {
45 None
46 }
47 }
48
49 /// Create a new finder specific to SSE2 vectors and routines without
50 /// checking that SSE2 is available.
51 ///
52 /// # Safety
53 ///
54 /// Callers must guarantee that it is safe to execute `sse2` instructions
55 /// in the current environment.
56 ///
57 /// Note that it is a common misconception that if one compiles for an
58 /// `x86_64` target, then they therefore automatically have access to SSE2
59 /// instructions. While this is almost always the case, it isn't true in
60 /// 100% of cases.
61 #[target_feature(enable = "sse2")]
62 #[inline]
63 pub unsafe fn new_unchecked(needle: u8) -> One {
64 One(generic::One::new(needle))
65 }
66
67 /// Returns true when this implementation is available in the current
68 /// environment.
69 ///
70 /// When this is true, it is guaranteed that [`One::new`] will return
71 /// a `Some` value. Similarly, when it is false, it is guaranteed that
72 /// `One::new` will return a `None` value.
73 ///
74 /// Note also that for the lifetime of a single program, if this returns
75 /// true then it will always return true.
76 #[inline]
77 pub fn is_available() -> bool {
78 #[cfg(target_feature = "sse2")]
79 {
80 true
81 }
82 #[cfg(not(target_feature = "sse2"))]
83 {
84 false
85 }
86 }
87
88 /// Return the first occurrence of one of the needle bytes in the given
89 /// haystack. If no such occurrence exists, then `None` is returned.
90 ///
91 /// The occurrence is reported as an offset into `haystack`. Its maximum
92 /// value is `haystack.len() - 1`.
93 #[inline]
94 pub fn find(&self, haystack: &[u8]) -> Option<usize> {
95 // SAFETY: `find_raw` guarantees that if a pointer is returned, it
96 // falls within the bounds of the start and end pointers.
97 unsafe {
98 generic::search_slice_with_raw(haystack, |s, e| {
99 self.find_raw(s, e)
100 })
101 }
102 }
103
104 /// Return the last occurrence of one of the needle bytes in the given
105 /// haystack. If no such occurrence exists, then `None` is returned.
106 ///
107 /// The occurrence is reported as an offset into `haystack`. Its maximum
108 /// value is `haystack.len() - 1`.
109 #[inline]
110 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
111 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
112 // falls within the bounds of the start and end pointers.
113 unsafe {
114 generic::search_slice_with_raw(haystack, |s, e| {
115 self.rfind_raw(s, e)
116 })
117 }
118 }
119
120 /// Counts all occurrences of this byte in the given haystack.
121 #[inline]
122 pub fn count(&self, haystack: &[u8]) -> usize {
123 // SAFETY: All of our pointers are derived directly from a borrowed
124 // slice, which is guaranteed to be valid.
125 unsafe {
126 let start = haystack.as_ptr();
127 let end = start.add(haystack.len());
128 self.count_raw(start, end)
129 }
130 }
131
132 /// Like `find`, but accepts and returns raw pointers.
133 ///
134 /// When a match is found, the pointer returned is guaranteed to be
135 /// `>= start` and `< end`.
136 ///
137 /// This routine is useful if you're already using raw pointers and would
138 /// like to avoid converting back to a slice before executing a search.
139 ///
140 /// # Safety
141 ///
142 /// * Both `start` and `end` must be valid for reads.
143 /// * Both `start` and `end` must point to an initialized value.
144 /// * Both `start` and `end` must point to the same allocated object and
145 /// must either be in bounds or at most one byte past the end of the
146 /// allocated object.
147 /// * Both `start` and `end` must be _derived from_ a pointer to the same
148 /// object.
149 /// * The distance between `start` and `end` must not overflow `isize`.
150 /// * The distance being in bounds must not rely on "wrapping around" the
151 /// address space.
152 ///
153 /// Note that callers may pass a pair of pointers such that `start >= end`.
154 /// In that case, `None` will always be returned.
155 #[inline]
156 pub unsafe fn find_raw(
157 &self,
158 start: *const u8,
159 end: *const u8,
160 ) -> Option<*const u8> {
161 if start >= end {
162 return None;
163 }
164 if end.distance(start) < __m128i::BYTES {
165 // SAFETY: We require the caller to pass valid start/end pointers.
166 return generic::fwd_byte_by_byte(start, end, |b| {
167 b == self.0.needle1()
168 });
169 }
170 // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
171 // Also, we've checked that our haystack is big enough to run on the
172 // vector routine. Pointer validity is caller's responsibility.
173 //
174 // Note that we could call `self.0.find_raw` directly here. But that
175 // means we'd have to annotate this routine with `target_feature`.
176 // Which is fine, because this routine is `unsafe` anyway and the
177 // `target_feature` obligation is met by virtue of building a `One`.
178 // The real problem is that a routine with a `target_feature`
179 // annotation generally can't be inlined into caller code unless the
180 // caller code has the same target feature annotations. Which is maybe
181 // okay for SSE2, but we do the same thing for AVX2 where caller code
182 // probably usually doesn't have AVX2 enabled. That means that this
183 // routine can be inlined which will handle some of the short-haystack
184 // cases above without touching the architecture specific code.
185 self.find_raw_impl(start, end)
186 }
187
188 /// Like `rfind`, but accepts and returns raw pointers.
189 ///
190 /// When a match is found, the pointer returned is guaranteed to be
191 /// `>= start` and `< end`.
192 ///
193 /// This routine is useful if you're already using raw pointers and would
194 /// like to avoid converting back to a slice before executing a search.
195 ///
196 /// # Safety
197 ///
198 /// * Both `start` and `end` must be valid for reads.
199 /// * Both `start` and `end` must point to an initialized value.
200 /// * Both `start` and `end` must point to the same allocated object and
201 /// must either be in bounds or at most one byte past the end of the
202 /// allocated object.
203 /// * Both `start` and `end` must be _derived from_ a pointer to the same
204 /// object.
205 /// * The distance between `start` and `end` must not overflow `isize`.
206 /// * The distance being in bounds must not rely on "wrapping around" the
207 /// address space.
208 ///
209 /// Note that callers may pass a pair of pointers such that `start >= end`.
210 /// In that case, `None` will always be returned.
211 #[inline]
212 pub unsafe fn rfind_raw(
213 &self,
214 start: *const u8,
215 end: *const u8,
216 ) -> Option<*const u8> {
217 if start >= end {
218 return None;
219 }
220 if end.distance(start) < __m128i::BYTES {
221 // SAFETY: We require the caller to pass valid start/end pointers.
222 return generic::rev_byte_by_byte(start, end, |b| {
223 b == self.0.needle1()
224 });
225 }
226 // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
227 // Also, we've checked that our haystack is big enough to run on the
228 // vector routine. Pointer validity is caller's responsibility.
229 //
230 // See note in forward routine above for why we don't just call
231 // `self.0.rfind_raw` directly here.
232 self.rfind_raw_impl(start, end)
233 }
234
235 /// Counts all occurrences of this byte in the given haystack represented
236 /// by raw pointers.
237 ///
238 /// This routine is useful if you're already using raw pointers and would
239 /// like to avoid converting back to a slice before executing a search.
240 ///
241 /// # Safety
242 ///
243 /// * Both `start` and `end` must be valid for reads.
244 /// * Both `start` and `end` must point to an initialized value.
245 /// * Both `start` and `end` must point to the same allocated object and
246 /// must either be in bounds or at most one byte past the end of the
247 /// allocated object.
248 /// * Both `start` and `end` must be _derived from_ a pointer to the same
249 /// object.
250 /// * The distance between `start` and `end` must not overflow `isize`.
251 /// * The distance being in bounds must not rely on "wrapping around" the
252 /// address space.
253 ///
254 /// Note that callers may pass a pair of pointers such that `start >= end`.
255 /// In that case, `0` will always be returned.
256 #[inline]
257 pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
258 if start >= end {
259 return 0;
260 }
261 if end.distance(start) < __m128i::BYTES {
262 // SAFETY: We require the caller to pass valid start/end pointers.
263 return generic::count_byte_by_byte(start, end, |b| {
264 b == self.0.needle1()
265 });
266 }
267 // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
268 // Also, we've checked that our haystack is big enough to run on the
269 // vector routine. Pointer validity is caller's responsibility.
270 self.count_raw_impl(start, end)
271 }
272
273 /// Execute a search using SSE2 vectors and routines.
274 ///
275 /// # Safety
276 ///
277 /// Same as [`One::find_raw`], except the distance between `start` and
278 /// `end` must be at least the size of an SSE2 vector (in bytes).
279 ///
280 /// (The target feature safety obligation is automatically fulfilled by
281 /// virtue of being a method on `One`, which can only be constructed
282 /// when it is safe to call `sse2` routines.)
283 #[target_feature(enable = "sse2")]
284 #[inline]
285 unsafe fn find_raw_impl(
286 &self,
287 start: *const u8,
288 end: *const u8,
289 ) -> Option<*const u8> {
290 self.0.find_raw(start, end)
291 }
292
293 /// Execute a search using SSE2 vectors and routines.
294 ///
295 /// # Safety
296 ///
297 /// Same as [`One::rfind_raw`], except the distance between `start` and
298 /// `end` must be at least the size of an SSE2 vector (in bytes).
299 ///
300 /// (The target feature safety obligation is automatically fulfilled by
301 /// virtue of being a method on `One`, which can only be constructed
302 /// when it is safe to call `sse2` routines.)
303 #[target_feature(enable = "sse2")]
304 #[inline]
305 unsafe fn rfind_raw_impl(
306 &self,
307 start: *const u8,
308 end: *const u8,
309 ) -> Option<*const u8> {
310 self.0.rfind_raw(start, end)
311 }
312
313 /// Execute a count using SSE2 vectors and routines.
314 ///
315 /// # Safety
316 ///
317 /// Same as [`One::count_raw`], except the distance between `start` and
318 /// `end` must be at least the size of an SSE2 vector (in bytes).
319 ///
320 /// (The target feature safety obligation is automatically fulfilled by
321 /// virtue of being a method on `One`, which can only be constructed
322 /// when it is safe to call `sse2` routines.)
323 #[target_feature(enable = "sse2")]
324 #[inline]
325 unsafe fn count_raw_impl(
326 &self,
327 start: *const u8,
328 end: *const u8,
329 ) -> usize {
330 self.0.count_raw(start, end)
331 }
332
333 /// Returns an iterator over all occurrences of the needle byte in the
334 /// given haystack.
335 ///
336 /// The iterator returned implements `DoubleEndedIterator`. This means it
337 /// can also be used to find occurrences in reverse order.
338 #[inline]
339 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
340 OneIter { searcher: self, it: generic::Iter::new(haystack) }
341 }
342}
343
344/// An iterator over all occurrences of a single byte in a haystack.
345///
346/// This iterator implements `DoubleEndedIterator`, which means it can also be
347/// used to find occurrences in reverse order.
348///
349/// This iterator is created by the [`One::iter`] method.
350///
351/// The lifetime parameters are as follows:
352///
353/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
354/// * `'h` refers to the lifetime of the haystack being searched.
355#[derive(Clone, Debug)]
356pub struct OneIter<'a, 'h> {
357 searcher: &'a One,
358 it: generic::Iter<'h>,
359}
360
361impl<'a, 'h> Iterator for OneIter<'a, 'h> {
362 type Item = usize;
363
364 #[inline]
365 fn next(&mut self) -> Option<usize> {
366 // SAFETY: We rely on the generic iterator to provide valid start
367 // and end pointers, but we guarantee that any pointer returned by
368 // 'find_raw' falls within the bounds of the start and end pointer.
369 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
370 }
371
372 #[inline]
373 fn count(self) -> usize {
374 self.it.count(|s, e| {
375 // SAFETY: We rely on our generic iterator to return valid start
376 // and end pointers.
377 unsafe { self.searcher.count_raw(s, e) }
378 })
379 }
380
381 #[inline]
382 fn size_hint(&self) -> (usize, Option<usize>) {
383 self.it.size_hint()
384 }
385}
386
387impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
388 #[inline]
389 fn next_back(&mut self) -> Option<usize> {
390 // SAFETY: We rely on the generic iterator to provide valid start
391 // and end pointers, but we guarantee that any pointer returned by
392 // 'rfind_raw' falls within the bounds of the start and end pointer.
393 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
394 }
395}
396
397impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
398
399/// Finds all occurrences of two bytes in a haystack.
400///
401/// That is, this reports matches of one of two possible bytes. For example,
402/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
403/// `4` and `5`.
404#[derive(Clone, Copy, Debug)]
405pub struct Two(generic::Two<__m128i>);
406
407impl Two {
408 /// Create a new searcher that finds occurrences of the needle bytes given.
409 ///
410 /// This particular searcher is specialized to use SSE2 vector instructions
411 /// that typically make it quite fast.
412 ///
413 /// If SSE2 is unavailable in the current environment, then `None` is
414 /// returned.
415 #[inline]
416 pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
417 if Two::is_available() {
418 // SAFETY: we check that sse2 is available above.
419 unsafe { Some(Two::new_unchecked(needle1, needle2)) }
420 } else {
421 None
422 }
423 }
424
425 /// Create a new finder specific to SSE2 vectors and routines without
426 /// checking that SSE2 is available.
427 ///
428 /// # Safety
429 ///
430 /// Callers must guarantee that it is safe to execute `sse2` instructions
431 /// in the current environment.
432 ///
433 /// Note that it is a common misconception that if one compiles for an
434 /// `x86_64` target, then they therefore automatically have access to SSE2
435 /// instructions. While this is almost always the case, it isn't true in
436 /// 100% of cases.
437 #[target_feature(enable = "sse2")]
438 #[inline]
439 pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
440 Two(generic::Two::new(needle1, needle2))
441 }
442
443 /// Returns true when this implementation is available in the current
444 /// environment.
445 ///
446 /// When this is true, it is guaranteed that [`Two::new`] will return
447 /// a `Some` value. Similarly, when it is false, it is guaranteed that
448 /// `Two::new` will return a `None` value.
449 ///
450 /// Note also that for the lifetime of a single program, if this returns
451 /// true then it will always return true.
452 #[inline]
453 pub fn is_available() -> bool {
454 #[cfg(target_feature = "sse2")]
455 {
456 true
457 }
458 #[cfg(not(target_feature = "sse2"))]
459 {
460 false
461 }
462 }
463
464 /// Return the first occurrence of one of the needle bytes in the given
465 /// haystack. If no such occurrence exists, then `None` is returned.
466 ///
467 /// The occurrence is reported as an offset into `haystack`. Its maximum
468 /// value is `haystack.len() - 1`.
469 #[inline]
470 pub fn find(&self, haystack: &[u8]) -> Option<usize> {
471 // SAFETY: `find_raw` guarantees that if a pointer is returned, it
472 // falls within the bounds of the start and end pointers.
473 unsafe {
474 generic::search_slice_with_raw(haystack, |s, e| {
475 self.find_raw(s, e)
476 })
477 }
478 }
479
480 /// Return the last occurrence of one of the needle bytes in the given
481 /// haystack. If no such occurrence exists, then `None` is returned.
482 ///
483 /// The occurrence is reported as an offset into `haystack`. Its maximum
484 /// value is `haystack.len() - 1`.
485 #[inline]
486 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
487 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
488 // falls within the bounds of the start and end pointers.
489 unsafe {
490 generic::search_slice_with_raw(haystack, |s, e| {
491 self.rfind_raw(s, e)
492 })
493 }
494 }
495
496 /// Like `find`, but accepts and returns raw pointers.
497 ///
498 /// When a match is found, the pointer returned is guaranteed to be
499 /// `>= start` and `< end`.
500 ///
501 /// This routine is useful if you're already using raw pointers and would
502 /// like to avoid converting back to a slice before executing a search.
503 ///
504 /// # Safety
505 ///
506 /// * Both `start` and `end` must be valid for reads.
507 /// * Both `start` and `end` must point to an initialized value.
508 /// * Both `start` and `end` must point to the same allocated object and
509 /// must either be in bounds or at most one byte past the end of the
510 /// allocated object.
511 /// * Both `start` and `end` must be _derived from_ a pointer to the same
512 /// object.
513 /// * The distance between `start` and `end` must not overflow `isize`.
514 /// * The distance being in bounds must not rely on "wrapping around" the
515 /// address space.
516 ///
517 /// Note that callers may pass a pair of pointers such that `start >= end`.
518 /// In that case, `None` will always be returned.
519 #[inline]
520 pub unsafe fn find_raw(
521 &self,
522 start: *const u8,
523 end: *const u8,
524 ) -> Option<*const u8> {
525 if start >= end {
526 return None;
527 }
528 if end.distance(start) < __m128i::BYTES {
529 // SAFETY: We require the caller to pass valid start/end pointers.
530 return generic::fwd_byte_by_byte(start, end, |b| {
531 b == self.0.needle1() || b == self.0.needle2()
532 });
533 }
534 // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
535 // Also, we've checked that our haystack is big enough to run on the
536 // vector routine. Pointer validity is caller's responsibility.
537 //
538 // Note that we could call `self.0.find_raw` directly here. But that
539 // means we'd have to annotate this routine with `target_feature`.
540 // Which is fine, because this routine is `unsafe` anyway and the
541 // `target_feature` obligation is met by virtue of building a `Two`.
542 // The real problem is that a routine with a `target_feature`
543 // annotation generally can't be inlined into caller code unless the
544 // caller code has the same target feature annotations. Which is maybe
545 // okay for SSE2, but we do the same thing for AVX2 where caller code
546 // probably usually doesn't have AVX2 enabled. That means that this
547 // routine can be inlined which will handle some of the short-haystack
548 // cases above without touching the architecture specific code.
549 self.find_raw_impl(start, end)
550 }
551
552 /// Like `rfind`, but accepts and returns raw pointers.
553 ///
554 /// When a match is found, the pointer returned is guaranteed to be
555 /// `>= start` and `< end`.
556 ///
557 /// This routine is useful if you're already using raw pointers and would
558 /// like to avoid converting back to a slice before executing a search.
559 ///
560 /// # Safety
561 ///
562 /// * Both `start` and `end` must be valid for reads.
563 /// * Both `start` and `end` must point to an initialized value.
564 /// * Both `start` and `end` must point to the same allocated object and
565 /// must either be in bounds or at most one byte past the end of the
566 /// allocated object.
567 /// * Both `start` and `end` must be _derived from_ a pointer to the same
568 /// object.
569 /// * The distance between `start` and `end` must not overflow `isize`.
570 /// * The distance being in bounds must not rely on "wrapping around" the
571 /// address space.
572 ///
573 /// Note that callers may pass a pair of pointers such that `start >= end`.
574 /// In that case, `None` will always be returned.
575 #[inline]
576 pub unsafe fn rfind_raw(
577 &self,
578 start: *const u8,
579 end: *const u8,
580 ) -> Option<*const u8> {
581 if start >= end {
582 return None;
583 }
584 if end.distance(start) < __m128i::BYTES {
585 // SAFETY: We require the caller to pass valid start/end pointers.
586 return generic::rev_byte_by_byte(start, end, |b| {
587 b == self.0.needle1() || b == self.0.needle2()
588 });
589 }
590 // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
591 // Also, we've checked that our haystack is big enough to run on the
592 // vector routine. Pointer validity is caller's responsibility.
593 //
594 // See note in forward routine above for why we don't just call
595 // `self.0.rfind_raw` directly here.
596 self.rfind_raw_impl(start, end)
597 }
598
599 /// Execute a search using SSE2 vectors and routines.
600 ///
601 /// # Safety
602 ///
603 /// Same as [`Two::find_raw`], except the distance between `start` and
604 /// `end` must be at least the size of an SSE2 vector (in bytes).
605 ///
606 /// (The target feature safety obligation is automatically fulfilled by
607 /// virtue of being a method on `Two`, which can only be constructed
608 /// when it is safe to call `sse2` routines.)
609 #[target_feature(enable = "sse2")]
610 #[inline]
611 unsafe fn find_raw_impl(
612 &self,
613 start: *const u8,
614 end: *const u8,
615 ) -> Option<*const u8> {
616 self.0.find_raw(start, end)
617 }
618
619 /// Execute a search using SSE2 vectors and routines.
620 ///
621 /// # Safety
622 ///
623 /// Same as [`Two::rfind_raw`], except the distance between `start` and
624 /// `end` must be at least the size of an SSE2 vector (in bytes).
625 ///
626 /// (The target feature safety obligation is automatically fulfilled by
627 /// virtue of being a method on `Two`, which can only be constructed
628 /// when it is safe to call `sse2` routines.)
629 #[target_feature(enable = "sse2")]
630 #[inline]
631 unsafe fn rfind_raw_impl(
632 &self,
633 start: *const u8,
634 end: *const u8,
635 ) -> Option<*const u8> {
636 self.0.rfind_raw(start, end)
637 }
638
639 /// Returns an iterator over all occurrences of the needle bytes in the
640 /// given haystack.
641 ///
642 /// The iterator returned implements `DoubleEndedIterator`. This means it
643 /// can also be used to find occurrences in reverse order.
644 #[inline]
645 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
646 TwoIter { searcher: self, it: generic::Iter::new(haystack) }
647 }
648}
649
650/// An iterator over all occurrences of two possible bytes in a haystack.
651///
652/// This iterator implements `DoubleEndedIterator`, which means it can also be
653/// used to find occurrences in reverse order.
654///
655/// This iterator is created by the [`Two::iter`] method.
656///
657/// The lifetime parameters are as follows:
658///
659/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
660/// * `'h` refers to the lifetime of the haystack being searched.
661#[derive(Clone, Debug)]
662pub struct TwoIter<'a, 'h> {
663 searcher: &'a Two,
664 it: generic::Iter<'h>,
665}
666
667impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
668 type Item = usize;
669
670 #[inline]
671 fn next(&mut self) -> Option<usize> {
672 // SAFETY: We rely on the generic iterator to provide valid start
673 // and end pointers, but we guarantee that any pointer returned by
674 // 'find_raw' falls within the bounds of the start and end pointer.
675 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
676 }
677
678 #[inline]
679 fn size_hint(&self) -> (usize, Option<usize>) {
680 self.it.size_hint()
681 }
682}
683
684impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
685 #[inline]
686 fn next_back(&mut self) -> Option<usize> {
687 // SAFETY: We rely on the generic iterator to provide valid start
688 // and end pointers, but we guarantee that any pointer returned by
689 // 'rfind_raw' falls within the bounds of the start and end pointer.
690 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
691 }
692}
693
694impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
695
696/// Finds all occurrences of three bytes in a haystack.
697///
698/// That is, this reports matches of one of three possible bytes. For example,
699/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
700/// `0`, `2`, `3`, `4` and `5`.
701#[derive(Clone, Copy, Debug)]
702pub struct Three(generic::Three<__m128i>);
703
704impl Three {
705 /// Create a new searcher that finds occurrences of the needle bytes given.
706 ///
707 /// This particular searcher is specialized to use SSE2 vector instructions
708 /// that typically make it quite fast.
709 ///
710 /// If SSE2 is unavailable in the current environment, then `None` is
711 /// returned.
712 #[inline]
713 pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
714 if Three::is_available() {
715 // SAFETY: we check that sse2 is available above.
716 unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
717 } else {
718 None
719 }
720 }
721
722 /// Create a new finder specific to SSE2 vectors and routines without
723 /// checking that SSE2 is available.
724 ///
725 /// # Safety
726 ///
727 /// Callers must guarantee that it is safe to execute `sse2` instructions
728 /// in the current environment.
729 ///
730 /// Note that it is a common misconception that if one compiles for an
731 /// `x86_64` target, then they therefore automatically have access to SSE2
732 /// instructions. While this is almost always the case, it isn't true in
733 /// 100% of cases.
734 #[target_feature(enable = "sse2")]
735 #[inline]
736 pub unsafe fn new_unchecked(
737 needle1: u8,
738 needle2: u8,
739 needle3: u8,
740 ) -> Three {
741 Three(generic::Three::new(needle1, needle2, needle3))
742 }
743
744 /// Returns true when this implementation is available in the current
745 /// environment.
746 ///
747 /// When this is true, it is guaranteed that [`Three::new`] will return
748 /// a `Some` value. Similarly, when it is false, it is guaranteed that
749 /// `Three::new` will return a `None` value.
750 ///
751 /// Note also that for the lifetime of a single program, if this returns
752 /// true then it will always return true.
753 #[inline]
754 pub fn is_available() -> bool {
755 #[cfg(target_feature = "sse2")]
756 {
757 true
758 }
759 #[cfg(not(target_feature = "sse2"))]
760 {
761 false
762 }
763 }
764
765 /// Return the first occurrence of one of the needle bytes in the given
766 /// haystack. If no such occurrence exists, then `None` is returned.
767 ///
768 /// The occurrence is reported as an offset into `haystack`. Its maximum
769 /// value is `haystack.len() - 1`.
770 #[inline]
771 pub fn find(&self, haystack: &[u8]) -> Option<usize> {
772 // SAFETY: `find_raw` guarantees that if a pointer is returned, it
773 // falls within the bounds of the start and end pointers.
774 unsafe {
775 generic::search_slice_with_raw(haystack, |s, e| {
776 self.find_raw(s, e)
777 })
778 }
779 }
780
781 /// Return the last occurrence of one of the needle bytes in the given
782 /// haystack. If no such occurrence exists, then `None` is returned.
783 ///
784 /// The occurrence is reported as an offset into `haystack`. Its maximum
785 /// value is `haystack.len() - 1`.
786 #[inline]
787 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
788 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
789 // falls within the bounds of the start and end pointers.
790 unsafe {
791 generic::search_slice_with_raw(haystack, |s, e| {
792 self.rfind_raw(s, e)
793 })
794 }
795 }
796
797 /// Like `find`, but accepts and returns raw pointers.
798 ///
799 /// When a match is found, the pointer returned is guaranteed to be
800 /// `>= start` and `< end`.
801 ///
802 /// This routine is useful if you're already using raw pointers and would
803 /// like to avoid converting back to a slice before executing a search.
804 ///
805 /// # Safety
806 ///
807 /// * Both `start` and `end` must be valid for reads.
808 /// * Both `start` and `end` must point to an initialized value.
809 /// * Both `start` and `end` must point to the same allocated object and
810 /// must either be in bounds or at most one byte past the end of the
811 /// allocated object.
812 /// * Both `start` and `end` must be _derived from_ a pointer to the same
813 /// object.
814 /// * The distance between `start` and `end` must not overflow `isize`.
815 /// * The distance being in bounds must not rely on "wrapping around" the
816 /// address space.
817 ///
818 /// Note that callers may pass a pair of pointers such that `start >= end`.
819 /// In that case, `None` will always be returned.
820 #[inline]
821 pub unsafe fn find_raw(
822 &self,
823 start: *const u8,
824 end: *const u8,
825 ) -> Option<*const u8> {
826 if start >= end {
827 return None;
828 }
829 if end.distance(start) < __m128i::BYTES {
830 // SAFETY: We require the caller to pass valid start/end pointers.
831 return generic::fwd_byte_by_byte(start, end, |b| {
832 b == self.0.needle1()
833 || b == self.0.needle2()
834 || b == self.0.needle3()
835 });
836 }
837 // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
838 // Also, we've checked that our haystack is big enough to run on the
839 // vector routine. Pointer validity is caller's responsibility.
840 //
841 // Note that we could call `self.0.find_raw` directly here. But that
842 // means we'd have to annotate this routine with `target_feature`.
843 // Which is fine, because this routine is `unsafe` anyway and the
844 // `target_feature` obligation is met by virtue of building a `Three`.
845 // The real problem is that a routine with a `target_feature`
846 // annotation generally can't be inlined into caller code unless the
847 // caller code has the same target feature annotations. Which is maybe
848 // okay for SSE2, but we do the same thing for AVX2 where caller code
849 // probably usually doesn't have AVX2 enabled. That means that this
850 // routine can be inlined which will handle some of the short-haystack
851 // cases above without touching the architecture specific code.
852 self.find_raw_impl(start, end)
853 }
854
855 /// Like `rfind`, but accepts and returns raw pointers.
856 ///
857 /// When a match is found, the pointer returned is guaranteed to be
858 /// `>= start` and `< end`.
859 ///
860 /// This routine is useful if you're already using raw pointers and would
861 /// like to avoid converting back to a slice before executing a search.
862 ///
863 /// # Safety
864 ///
865 /// * Both `start` and `end` must be valid for reads.
866 /// * Both `start` and `end` must point to an initialized value.
867 /// * Both `start` and `end` must point to the same allocated object and
868 /// must either be in bounds or at most one byte past the end of the
869 /// allocated object.
870 /// * Both `start` and `end` must be _derived from_ a pointer to the same
871 /// object.
872 /// * The distance between `start` and `end` must not overflow `isize`.
873 /// * The distance being in bounds must not rely on "wrapping around" the
874 /// address space.
875 ///
876 /// Note that callers may pass a pair of pointers such that `start >= end`.
877 /// In that case, `None` will always be returned.
878 #[inline]
879 pub unsafe fn rfind_raw(
880 &self,
881 start: *const u8,
882 end: *const u8,
883 ) -> Option<*const u8> {
884 if start >= end {
885 return None;
886 }
887 if end.distance(start) < __m128i::BYTES {
888 // SAFETY: We require the caller to pass valid start/end pointers.
889 return generic::rev_byte_by_byte(start, end, |b| {
890 b == self.0.needle1()
891 || b == self.0.needle2()
892 || b == self.0.needle3()
893 });
894 }
895 // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
896 // Also, we've checked that our haystack is big enough to run on the
897 // vector routine. Pointer validity is caller's responsibility.
898 //
899 // See note in forward routine above for why we don't just call
900 // `self.0.rfind_raw` directly here.
901 self.rfind_raw_impl(start, end)
902 }
903
904 /// Execute a search using SSE2 vectors and routines.
905 ///
906 /// # Safety
907 ///
908 /// Same as [`Three::find_raw`], except the distance between `start` and
909 /// `end` must be at least the size of an SSE2 vector (in bytes).
910 ///
911 /// (The target feature safety obligation is automatically fulfilled by
912 /// virtue of being a method on `Three`, which can only be constructed
913 /// when it is safe to call `sse2` routines.)
914 #[target_feature(enable = "sse2")]
915 #[inline]
916 unsafe fn find_raw_impl(
917 &self,
918 start: *const u8,
919 end: *const u8,
920 ) -> Option<*const u8> {
921 self.0.find_raw(start, end)
922 }
923
924 /// Execute a search using SSE2 vectors and routines.
925 ///
926 /// # Safety
927 ///
928 /// Same as [`Three::rfind_raw`], except the distance between `start` and
929 /// `end` must be at least the size of an SSE2 vector (in bytes).
930 ///
931 /// (The target feature safety obligation is automatically fulfilled by
932 /// virtue of being a method on `Three`, which can only be constructed
933 /// when it is safe to call `sse2` routines.)
934 #[target_feature(enable = "sse2")]
935 #[inline]
936 unsafe fn rfind_raw_impl(
937 &self,
938 start: *const u8,
939 end: *const u8,
940 ) -> Option<*const u8> {
941 self.0.rfind_raw(start, end)
942 }
943
944 /// Returns an iterator over all occurrences of the needle byte in the
945 /// given haystack.
946 ///
947 /// The iterator returned implements `DoubleEndedIterator`. This means it
948 /// can also be used to find occurrences in reverse order.
949 #[inline]
950 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
951 ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
952 }
953}
954
955/// An iterator over all occurrences of three possible bytes in a haystack.
956///
957/// This iterator implements `DoubleEndedIterator`, which means it can also be
958/// used to find occurrences in reverse order.
959///
960/// This iterator is created by the [`Three::iter`] method.
961///
962/// The lifetime parameters are as follows:
963///
964/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
965/// * `'h` refers to the lifetime of the haystack being searched.
966#[derive(Clone, Debug)]
967pub struct ThreeIter<'a, 'h> {
968 searcher: &'a Three,
969 it: generic::Iter<'h>,
970}
971
972impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
973 type Item = usize;
974
975 #[inline]
976 fn next(&mut self) -> Option<usize> {
977 // SAFETY: We rely on the generic iterator to provide valid start
978 // and end pointers, but we guarantee that any pointer returned by
979 // 'find_raw' falls within the bounds of the start and end pointer.
980 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
981 }
982
983 #[inline]
984 fn size_hint(&self) -> (usize, Option<usize>) {
985 self.it.size_hint()
986 }
987}
988
989impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
990 #[inline]
991 fn next_back(&mut self) -> Option<usize> {
992 // SAFETY: We rely on the generic iterator to provide valid start
993 // and end pointers, but we guarantee that any pointer returned by
994 // 'rfind_raw' falls within the bounds of the start and end pointer.
995 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
996 }
997}
998
999impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
1000
1001#[cfg(test)]
1002mod tests {
1003 use super::*;
1004
1005 define_memchr_quickcheck!(super);
1006
1007 #[test]
1008 fn forward_one() {
1009 crate::tests::memchr::Runner::new(1).forward_iter(
1010 |haystack, needles| {
1011 Some(One::new(needles[0])?.iter(haystack).collect())
1012 },
1013 )
1014 }
1015
1016 #[test]
1017 fn reverse_one() {
1018 crate::tests::memchr::Runner::new(1).reverse_iter(
1019 |haystack, needles| {
1020 Some(One::new(needles[0])?.iter(haystack).rev().collect())
1021 },
1022 )
1023 }
1024
1025 #[test]
1026 fn count_one() {
1027 crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
1028 Some(One::new(needles[0])?.iter(haystack).count())
1029 })
1030 }
1031
1032 #[test]
1033 fn forward_two() {
1034 crate::tests::memchr::Runner::new(2).forward_iter(
1035 |haystack, needles| {
1036 let n1 = needles.get(0).copied()?;
1037 let n2 = needles.get(1).copied()?;
1038 Some(Two::new(n1, n2)?.iter(haystack).collect())
1039 },
1040 )
1041 }
1042
1043 #[test]
1044 fn reverse_two() {
1045 crate::tests::memchr::Runner::new(2).reverse_iter(
1046 |haystack, needles| {
1047 let n1 = needles.get(0).copied()?;
1048 let n2 = needles.get(1).copied()?;
1049 Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
1050 },
1051 )
1052 }
1053
1054 #[test]
1055 fn forward_three() {
1056 crate::tests::memchr::Runner::new(3).forward_iter(
1057 |haystack, needles| {
1058 let n1 = needles.get(0).copied()?;
1059 let n2 = needles.get(1).copied()?;
1060 let n3 = needles.get(2).copied()?;
1061 Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
1062 },
1063 )
1064 }
1065
1066 #[test]
1067 fn reverse_three() {
1068 crate::tests::memchr::Runner::new(3).reverse_iter(
1069 |haystack, needles| {
1070 let n1 = needles.get(0).copied()?;
1071 let n2 = needles.get(1).copied()?;
1072 let n3 = needles.get(2).copied()?;
1073 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
1074 },
1075 )
1076 }
1077}
1078