1 | /*! |
2 | This module defines 128-bit vector implementations of `memchr` and friends. |
3 | |
4 | The main types in this module are [`One`], [`Two`] and [`Three`]. They are for |
5 | searching for one, two or three distinct bytes, respectively, in a haystack. |
6 | Each type also has corresponding double ended iterators. These searchers are |
7 | typically much faster than scalar routines accomplishing the same task. |
8 | |
9 | The `One` searcher also provides a [`One::count`] routine for efficiently |
10 | counting the number of times a single byte occurs in a haystack. This is |
11 | useful, for example, for counting the number of lines in a haystack. This |
12 | routine exists because it is usually faster, especially with a high match |
13 | count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its |
14 | `Iterator::count` implementation to use this routine.) |
15 | |
16 | Only one, two and three bytes are supported because three bytes is about |
17 | the point where one sees diminishing returns. Beyond this point and it's |
18 | probably (but not necessarily) better to just use a simple `[bool; 256]` array |
19 | or similar. However, it depends mightily on the specific work-load and the |
20 | expected match frequency. |
21 | */ |
22 | |
23 | use core::arch::x86_64::__m128i; |
24 | |
25 | use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; |
26 | |
27 | /// Finds all occurrences of a single byte in a haystack. |
28 | #[derive(Clone, Copy, Debug)] |
29 | pub struct One(generic::One<__m128i>); |
30 | |
31 | impl One { |
32 | /// Create a new searcher that finds occurrences of the needle byte given. |
33 | /// |
34 | /// This particular searcher is specialized to use SSE2 vector instructions |
35 | /// that typically make it quite fast. |
36 | /// |
37 | /// If SSE2 is unavailable in the current environment, then `None` is |
38 | /// returned. |
39 | #[inline ] |
40 | pub fn new(needle: u8) -> Option<One> { |
41 | if One::is_available() { |
42 | // SAFETY: we check that sse2 is available above. |
43 | unsafe { Some(One::new_unchecked(needle)) } |
44 | } else { |
45 | None |
46 | } |
47 | } |
48 | |
49 | /// Create a new finder specific to SSE2 vectors and routines without |
50 | /// checking that SSE2 is available. |
51 | /// |
52 | /// # Safety |
53 | /// |
54 | /// Callers must guarantee that it is safe to execute `sse2` instructions |
55 | /// in the current environment. |
56 | /// |
57 | /// Note that it is a common misconception that if one compiles for an |
58 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
59 | /// instructions. While this is almost always the case, it isn't true in |
60 | /// 100% of cases. |
61 | #[target_feature (enable = "sse2" )] |
62 | #[inline ] |
63 | pub unsafe fn new_unchecked(needle: u8) -> One { |
64 | One(generic::One::new(needle)) |
65 | } |
66 | |
67 | /// Returns true when this implementation is available in the current |
68 | /// environment. |
69 | /// |
70 | /// When this is true, it is guaranteed that [`One::new`] will return |
71 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
72 | /// `One::new` will return a `None` value. |
73 | /// |
74 | /// Note also that for the lifetime of a single program, if this returns |
75 | /// true then it will always return true. |
76 | #[inline ] |
77 | pub fn is_available() -> bool { |
78 | #[cfg (target_feature = "sse2" )] |
79 | { |
80 | true |
81 | } |
82 | #[cfg (not(target_feature = "sse2" ))] |
83 | { |
84 | false |
85 | } |
86 | } |
87 | |
88 | /// Return the first occurrence of one of the needle bytes in the given |
89 | /// haystack. If no such occurrence exists, then `None` is returned. |
90 | /// |
91 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
92 | /// value is `haystack.len() - 1`. |
93 | #[inline ] |
94 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
95 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
96 | // falls within the bounds of the start and end pointers. |
97 | unsafe { |
98 | generic::search_slice_with_raw(haystack, |s, e| { |
99 | self.find_raw(s, e) |
100 | }) |
101 | } |
102 | } |
103 | |
104 | /// Return the last occurrence of one of the needle bytes in the given |
105 | /// haystack. If no such occurrence exists, then `None` is returned. |
106 | /// |
107 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
108 | /// value is `haystack.len() - 1`. |
109 | #[inline ] |
110 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
111 | // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it |
112 | // falls within the bounds of the start and end pointers. |
113 | unsafe { |
114 | generic::search_slice_with_raw(haystack, |s, e| { |
115 | self.rfind_raw(s, e) |
116 | }) |
117 | } |
118 | } |
119 | |
120 | /// Counts all occurrences of this byte in the given haystack. |
121 | #[inline ] |
122 | pub fn count(&self, haystack: &[u8]) -> usize { |
123 | // SAFETY: All of our pointers are derived directly from a borrowed |
124 | // slice, which is guaranteed to be valid. |
125 | unsafe { |
126 | let start = haystack.as_ptr(); |
127 | let end = start.add(haystack.len()); |
128 | self.count_raw(start, end) |
129 | } |
130 | } |
131 | |
132 | /// Like `find`, but accepts and returns raw pointers. |
133 | /// |
134 | /// When a match is found, the pointer returned is guaranteed to be |
135 | /// `>= start` and `< end`. |
136 | /// |
137 | /// This routine is useful if you're already using raw pointers and would |
138 | /// like to avoid converting back to a slice before executing a search. |
139 | /// |
140 | /// # Safety |
141 | /// |
142 | /// * Both `start` and `end` must be valid for reads. |
143 | /// * Both `start` and `end` must point to an initialized value. |
144 | /// * Both `start` and `end` must point to the same allocated object and |
145 | /// must either be in bounds or at most one byte past the end of the |
146 | /// allocated object. |
147 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
148 | /// object. |
149 | /// * The distance between `start` and `end` must not overflow `isize`. |
150 | /// * The distance being in bounds must not rely on "wrapping around" the |
151 | /// address space. |
152 | /// |
153 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
154 | /// In that case, `None` will always be returned. |
155 | #[inline ] |
156 | pub unsafe fn find_raw( |
157 | &self, |
158 | start: *const u8, |
159 | end: *const u8, |
160 | ) -> Option<*const u8> { |
161 | if start >= end { |
162 | return None; |
163 | } |
164 | if end.distance(start) < __m128i::BYTES { |
165 | // SAFETY: We require the caller to pass valid start/end pointers. |
166 | return generic::fwd_byte_by_byte(start, end, |b| { |
167 | b == self.0.needle1() |
168 | }); |
169 | } |
170 | // SAFETY: Building a `One` means it's safe to call 'sse2' routines. |
171 | // Also, we've checked that our haystack is big enough to run on the |
172 | // vector routine. Pointer validity is caller's responsibility. |
173 | // |
174 | // Note that we could call `self.0.find_raw` directly here. But that |
175 | // means we'd have to annotate this routine with `target_feature`. |
176 | // Which is fine, because this routine is `unsafe` anyway and the |
177 | // `target_feature` obligation is met by virtue of building a `One`. |
178 | // The real problem is that a routine with a `target_feature` |
179 | // annotation generally can't be inlined into caller code unless the |
180 | // caller code has the same target feature annotations. Which is maybe |
181 | // okay for SSE2, but we do the same thing for AVX2 where caller code |
182 | // probably usually doesn't have AVX2 enabled. That means that this |
183 | // routine can be inlined which will handle some of the short-haystack |
184 | // cases above without touching the architecture specific code. |
185 | self.find_raw_impl(start, end) |
186 | } |
187 | |
188 | /// Like `rfind`, but accepts and returns raw pointers. |
189 | /// |
190 | /// When a match is found, the pointer returned is guaranteed to be |
191 | /// `>= start` and `< end`. |
192 | /// |
193 | /// This routine is useful if you're already using raw pointers and would |
194 | /// like to avoid converting back to a slice before executing a search. |
195 | /// |
196 | /// # Safety |
197 | /// |
198 | /// * Both `start` and `end` must be valid for reads. |
199 | /// * Both `start` and `end` must point to an initialized value. |
200 | /// * Both `start` and `end` must point to the same allocated object and |
201 | /// must either be in bounds or at most one byte past the end of the |
202 | /// allocated object. |
203 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
204 | /// object. |
205 | /// * The distance between `start` and `end` must not overflow `isize`. |
206 | /// * The distance being in bounds must not rely on "wrapping around" the |
207 | /// address space. |
208 | /// |
209 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
210 | /// In that case, `None` will always be returned. |
211 | #[inline ] |
212 | pub unsafe fn rfind_raw( |
213 | &self, |
214 | start: *const u8, |
215 | end: *const u8, |
216 | ) -> Option<*const u8> { |
217 | if start >= end { |
218 | return None; |
219 | } |
220 | if end.distance(start) < __m128i::BYTES { |
221 | // SAFETY: We require the caller to pass valid start/end pointers. |
222 | return generic::rev_byte_by_byte(start, end, |b| { |
223 | b == self.0.needle1() |
224 | }); |
225 | } |
226 | // SAFETY: Building a `One` means it's safe to call 'sse2' routines. |
227 | // Also, we've checked that our haystack is big enough to run on the |
228 | // vector routine. Pointer validity is caller's responsibility. |
229 | // |
230 | // See note in forward routine above for why we don't just call |
231 | // `self.0.rfind_raw` directly here. |
232 | self.rfind_raw_impl(start, end) |
233 | } |
234 | |
235 | /// Counts all occurrences of this byte in the given haystack represented |
236 | /// by raw pointers. |
237 | /// |
238 | /// This routine is useful if you're already using raw pointers and would |
239 | /// like to avoid converting back to a slice before executing a search. |
240 | /// |
241 | /// # Safety |
242 | /// |
243 | /// * Both `start` and `end` must be valid for reads. |
244 | /// * Both `start` and `end` must point to an initialized value. |
245 | /// * Both `start` and `end` must point to the same allocated object and |
246 | /// must either be in bounds or at most one byte past the end of the |
247 | /// allocated object. |
248 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
249 | /// object. |
250 | /// * The distance between `start` and `end` must not overflow `isize`. |
251 | /// * The distance being in bounds must not rely on "wrapping around" the |
252 | /// address space. |
253 | /// |
254 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
255 | /// In that case, `0` will always be returned. |
256 | #[inline ] |
257 | pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { |
258 | if start >= end { |
259 | return 0; |
260 | } |
261 | if end.distance(start) < __m128i::BYTES { |
262 | // SAFETY: We require the caller to pass valid start/end pointers. |
263 | return generic::count_byte_by_byte(start, end, |b| { |
264 | b == self.0.needle1() |
265 | }); |
266 | } |
267 | // SAFETY: Building a `One` means it's safe to call 'sse2' routines. |
268 | // Also, we've checked that our haystack is big enough to run on the |
269 | // vector routine. Pointer validity is caller's responsibility. |
270 | self.count_raw_impl(start, end) |
271 | } |
272 | |
273 | /// Execute a search using SSE2 vectors and routines. |
274 | /// |
275 | /// # Safety |
276 | /// |
277 | /// Same as [`One::find_raw`], except the distance between `start` and |
278 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
279 | /// |
280 | /// (The target feature safety obligation is automatically fulfilled by |
281 | /// virtue of being a method on `One`, which can only be constructed |
282 | /// when it is safe to call `sse2` routines.) |
283 | #[target_feature (enable = "sse2" )] |
284 | #[inline ] |
285 | unsafe fn find_raw_impl( |
286 | &self, |
287 | start: *const u8, |
288 | end: *const u8, |
289 | ) -> Option<*const u8> { |
290 | self.0.find_raw(start, end) |
291 | } |
292 | |
293 | /// Execute a search using SSE2 vectors and routines. |
294 | /// |
295 | /// # Safety |
296 | /// |
297 | /// Same as [`One::rfind_raw`], except the distance between `start` and |
298 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
299 | /// |
300 | /// (The target feature safety obligation is automatically fulfilled by |
301 | /// virtue of being a method on `One`, which can only be constructed |
302 | /// when it is safe to call `sse2` routines.) |
303 | #[target_feature (enable = "sse2" )] |
304 | #[inline ] |
305 | unsafe fn rfind_raw_impl( |
306 | &self, |
307 | start: *const u8, |
308 | end: *const u8, |
309 | ) -> Option<*const u8> { |
310 | self.0.rfind_raw(start, end) |
311 | } |
312 | |
313 | /// Execute a count using SSE2 vectors and routines. |
314 | /// |
315 | /// # Safety |
316 | /// |
317 | /// Same as [`One::count_raw`], except the distance between `start` and |
318 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
319 | /// |
320 | /// (The target feature safety obligation is automatically fulfilled by |
321 | /// virtue of being a method on `One`, which can only be constructed |
322 | /// when it is safe to call `sse2` routines.) |
323 | #[target_feature (enable = "sse2" )] |
324 | #[inline ] |
325 | unsafe fn count_raw_impl( |
326 | &self, |
327 | start: *const u8, |
328 | end: *const u8, |
329 | ) -> usize { |
330 | self.0.count_raw(start, end) |
331 | } |
332 | |
333 | /// Returns an iterator over all occurrences of the needle byte in the |
334 | /// given haystack. |
335 | /// |
336 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
337 | /// can also be used to find occurrences in reverse order. |
338 | #[inline ] |
339 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { |
340 | OneIter { searcher: self, it: generic::Iter::new(haystack) } |
341 | } |
342 | } |
343 | |
344 | /// An iterator over all occurrences of a single byte in a haystack. |
345 | /// |
346 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
347 | /// used to find occurrences in reverse order. |
348 | /// |
349 | /// This iterator is created by the [`One::iter`] method. |
350 | /// |
351 | /// The lifetime parameters are as follows: |
352 | /// |
353 | /// * `'a` refers to the lifetime of the underlying [`One`] searcher. |
354 | /// * `'h` refers to the lifetime of the haystack being searched. |
355 | #[derive(Clone, Debug)] |
356 | pub struct OneIter<'a, 'h> { |
357 | searcher: &'a One, |
358 | it: generic::Iter<'h>, |
359 | } |
360 | |
361 | impl<'a, 'h> Iterator for OneIter<'a, 'h> { |
362 | type Item = usize; |
363 | |
364 | #[inline ] |
365 | fn next(&mut self) -> Option<usize> { |
366 | // SAFETY: We rely on the generic iterator to provide valid start |
367 | // and end pointers, but we guarantee that any pointer returned by |
368 | // 'find_raw' falls within the bounds of the start and end pointer. |
369 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
370 | } |
371 | |
372 | #[inline ] |
373 | fn count(self) -> usize { |
374 | self.it.count(|s, e| { |
375 | // SAFETY: We rely on our generic iterator to return valid start |
376 | // and end pointers. |
377 | unsafe { self.searcher.count_raw(s, e) } |
378 | }) |
379 | } |
380 | |
381 | #[inline ] |
382 | fn size_hint(&self) -> (usize, Option<usize>) { |
383 | self.it.size_hint() |
384 | } |
385 | } |
386 | |
387 | impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { |
388 | #[inline ] |
389 | fn next_back(&mut self) -> Option<usize> { |
390 | // SAFETY: We rely on the generic iterator to provide valid start |
391 | // and end pointers, but we guarantee that any pointer returned by |
392 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
393 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
394 | } |
395 | } |
396 | |
397 | impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} |
398 | |
399 | /// Finds all occurrences of two bytes in a haystack. |
400 | /// |
401 | /// That is, this reports matches of one of two possible bytes. For example, |
402 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |
403 | /// `4` and `5`. |
404 | #[derive(Clone, Copy, Debug)] |
405 | pub struct Two(generic::Two<__m128i>); |
406 | |
407 | impl Two { |
408 | /// Create a new searcher that finds occurrences of the needle bytes given. |
409 | /// |
410 | /// This particular searcher is specialized to use SSE2 vector instructions |
411 | /// that typically make it quite fast. |
412 | /// |
413 | /// If SSE2 is unavailable in the current environment, then `None` is |
414 | /// returned. |
415 | #[inline ] |
416 | pub fn new(needle1: u8, needle2: u8) -> Option<Two> { |
417 | if Two::is_available() { |
418 | // SAFETY: we check that sse2 is available above. |
419 | unsafe { Some(Two::new_unchecked(needle1, needle2)) } |
420 | } else { |
421 | None |
422 | } |
423 | } |
424 | |
425 | /// Create a new finder specific to SSE2 vectors and routines without |
426 | /// checking that SSE2 is available. |
427 | /// |
428 | /// # Safety |
429 | /// |
430 | /// Callers must guarantee that it is safe to execute `sse2` instructions |
431 | /// in the current environment. |
432 | /// |
433 | /// Note that it is a common misconception that if one compiles for an |
434 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
435 | /// instructions. While this is almost always the case, it isn't true in |
436 | /// 100% of cases. |
437 | #[target_feature (enable = "sse2" )] |
438 | #[inline ] |
439 | pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { |
440 | Two(generic::Two::new(needle1, needle2)) |
441 | } |
442 | |
443 | /// Returns true when this implementation is available in the current |
444 | /// environment. |
445 | /// |
446 | /// When this is true, it is guaranteed that [`Two::new`] will return |
447 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
448 | /// `Two::new` will return a `None` value. |
449 | /// |
450 | /// Note also that for the lifetime of a single program, if this returns |
451 | /// true then it will always return true. |
452 | #[inline ] |
453 | pub fn is_available() -> bool { |
454 | #[cfg (target_feature = "sse2" )] |
455 | { |
456 | true |
457 | } |
458 | #[cfg (not(target_feature = "sse2" ))] |
459 | { |
460 | false |
461 | } |
462 | } |
463 | |
464 | /// Return the first occurrence of one of the needle bytes in the given |
465 | /// haystack. If no such occurrence exists, then `None` is returned. |
466 | /// |
467 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
468 | /// value is `haystack.len() - 1`. |
469 | #[inline ] |
470 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
471 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
472 | // falls within the bounds of the start and end pointers. |
473 | unsafe { |
474 | generic::search_slice_with_raw(haystack, |s, e| { |
475 | self.find_raw(s, e) |
476 | }) |
477 | } |
478 | } |
479 | |
480 | /// Return the last occurrence of one of the needle bytes in the given |
481 | /// haystack. If no such occurrence exists, then `None` is returned. |
482 | /// |
483 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
484 | /// value is `haystack.len() - 1`. |
485 | #[inline ] |
486 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
487 | // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it |
488 | // falls within the bounds of the start and end pointers. |
489 | unsafe { |
490 | generic::search_slice_with_raw(haystack, |s, e| { |
491 | self.rfind_raw(s, e) |
492 | }) |
493 | } |
494 | } |
495 | |
496 | /// Like `find`, but accepts and returns raw pointers. |
497 | /// |
498 | /// When a match is found, the pointer returned is guaranteed to be |
499 | /// `>= start` and `< end`. |
500 | /// |
501 | /// This routine is useful if you're already using raw pointers and would |
502 | /// like to avoid converting back to a slice before executing a search. |
503 | /// |
504 | /// # Safety |
505 | /// |
506 | /// * Both `start` and `end` must be valid for reads. |
507 | /// * Both `start` and `end` must point to an initialized value. |
508 | /// * Both `start` and `end` must point to the same allocated object and |
509 | /// must either be in bounds or at most one byte past the end of the |
510 | /// allocated object. |
511 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
512 | /// object. |
513 | /// * The distance between `start` and `end` must not overflow `isize`. |
514 | /// * The distance being in bounds must not rely on "wrapping around" the |
515 | /// address space. |
516 | /// |
517 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
518 | /// In that case, `None` will always be returned. |
519 | #[inline ] |
520 | pub unsafe fn find_raw( |
521 | &self, |
522 | start: *const u8, |
523 | end: *const u8, |
524 | ) -> Option<*const u8> { |
525 | if start >= end { |
526 | return None; |
527 | } |
528 | if end.distance(start) < __m128i::BYTES { |
529 | // SAFETY: We require the caller to pass valid start/end pointers. |
530 | return generic::fwd_byte_by_byte(start, end, |b| { |
531 | b == self.0.needle1() || b == self.0.needle2() |
532 | }); |
533 | } |
534 | // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. |
535 | // Also, we've checked that our haystack is big enough to run on the |
536 | // vector routine. Pointer validity is caller's responsibility. |
537 | // |
538 | // Note that we could call `self.0.find_raw` directly here. But that |
539 | // means we'd have to annotate this routine with `target_feature`. |
540 | // Which is fine, because this routine is `unsafe` anyway and the |
541 | // `target_feature` obligation is met by virtue of building a `Two`. |
542 | // The real problem is that a routine with a `target_feature` |
543 | // annotation generally can't be inlined into caller code unless the |
544 | // caller code has the same target feature annotations. Which is maybe |
545 | // okay for SSE2, but we do the same thing for AVX2 where caller code |
546 | // probably usually doesn't have AVX2 enabled. That means that this |
547 | // routine can be inlined which will handle some of the short-haystack |
548 | // cases above without touching the architecture specific code. |
549 | self.find_raw_impl(start, end) |
550 | } |
551 | |
552 | /// Like `rfind`, but accepts and returns raw pointers. |
553 | /// |
554 | /// When a match is found, the pointer returned is guaranteed to be |
555 | /// `>= start` and `< end`. |
556 | /// |
557 | /// This routine is useful if you're already using raw pointers and would |
558 | /// like to avoid converting back to a slice before executing a search. |
559 | /// |
560 | /// # Safety |
561 | /// |
562 | /// * Both `start` and `end` must be valid for reads. |
563 | /// * Both `start` and `end` must point to an initialized value. |
564 | /// * Both `start` and `end` must point to the same allocated object and |
565 | /// must either be in bounds or at most one byte past the end of the |
566 | /// allocated object. |
567 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
568 | /// object. |
569 | /// * The distance between `start` and `end` must not overflow `isize`. |
570 | /// * The distance being in bounds must not rely on "wrapping around" the |
571 | /// address space. |
572 | /// |
573 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
574 | /// In that case, `None` will always be returned. |
575 | #[inline ] |
576 | pub unsafe fn rfind_raw( |
577 | &self, |
578 | start: *const u8, |
579 | end: *const u8, |
580 | ) -> Option<*const u8> { |
581 | if start >= end { |
582 | return None; |
583 | } |
584 | if end.distance(start) < __m128i::BYTES { |
585 | // SAFETY: We require the caller to pass valid start/end pointers. |
586 | return generic::rev_byte_by_byte(start, end, |b| { |
587 | b == self.0.needle1() || b == self.0.needle2() |
588 | }); |
589 | } |
590 | // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. |
591 | // Also, we've checked that our haystack is big enough to run on the |
592 | // vector routine. Pointer validity is caller's responsibility. |
593 | // |
594 | // See note in forward routine above for why we don't just call |
595 | // `self.0.rfind_raw` directly here. |
596 | self.rfind_raw_impl(start, end) |
597 | } |
598 | |
599 | /// Execute a search using SSE2 vectors and routines. |
600 | /// |
601 | /// # Safety |
602 | /// |
603 | /// Same as [`Two::find_raw`], except the distance between `start` and |
604 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
605 | /// |
606 | /// (The target feature safety obligation is automatically fulfilled by |
607 | /// virtue of being a method on `Two`, which can only be constructed |
608 | /// when it is safe to call `sse2` routines.) |
609 | #[target_feature (enable = "sse2" )] |
610 | #[inline ] |
611 | unsafe fn find_raw_impl( |
612 | &self, |
613 | start: *const u8, |
614 | end: *const u8, |
615 | ) -> Option<*const u8> { |
616 | self.0.find_raw(start, end) |
617 | } |
618 | |
619 | /// Execute a search using SSE2 vectors and routines. |
620 | /// |
621 | /// # Safety |
622 | /// |
623 | /// Same as [`Two::rfind_raw`], except the distance between `start` and |
624 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
625 | /// |
626 | /// (The target feature safety obligation is automatically fulfilled by |
627 | /// virtue of being a method on `Two`, which can only be constructed |
628 | /// when it is safe to call `sse2` routines.) |
629 | #[target_feature (enable = "sse2" )] |
630 | #[inline ] |
631 | unsafe fn rfind_raw_impl( |
632 | &self, |
633 | start: *const u8, |
634 | end: *const u8, |
635 | ) -> Option<*const u8> { |
636 | self.0.rfind_raw(start, end) |
637 | } |
638 | |
639 | /// Returns an iterator over all occurrences of the needle bytes in the |
640 | /// given haystack. |
641 | /// |
642 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
643 | /// can also be used to find occurrences in reverse order. |
644 | #[inline ] |
645 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { |
646 | TwoIter { searcher: self, it: generic::Iter::new(haystack) } |
647 | } |
648 | } |
649 | |
650 | /// An iterator over all occurrences of two possible bytes in a haystack. |
651 | /// |
652 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
653 | /// used to find occurrences in reverse order. |
654 | /// |
655 | /// This iterator is created by the [`Two::iter`] method. |
656 | /// |
657 | /// The lifetime parameters are as follows: |
658 | /// |
659 | /// * `'a` refers to the lifetime of the underlying [`Two`] searcher. |
660 | /// * `'h` refers to the lifetime of the haystack being searched. |
661 | #[derive(Clone, Debug)] |
662 | pub struct TwoIter<'a, 'h> { |
663 | searcher: &'a Two, |
664 | it: generic::Iter<'h>, |
665 | } |
666 | |
667 | impl<'a, 'h> Iterator for TwoIter<'a, 'h> { |
668 | type Item = usize; |
669 | |
670 | #[inline ] |
671 | fn next(&mut self) -> Option<usize> { |
672 | // SAFETY: We rely on the generic iterator to provide valid start |
673 | // and end pointers, but we guarantee that any pointer returned by |
674 | // 'find_raw' falls within the bounds of the start and end pointer. |
675 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
676 | } |
677 | |
678 | #[inline ] |
679 | fn size_hint(&self) -> (usize, Option<usize>) { |
680 | self.it.size_hint() |
681 | } |
682 | } |
683 | |
684 | impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { |
685 | #[inline ] |
686 | fn next_back(&mut self) -> Option<usize> { |
687 | // SAFETY: We rely on the generic iterator to provide valid start |
688 | // and end pointers, but we guarantee that any pointer returned by |
689 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
690 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
691 | } |
692 | } |
693 | |
694 | impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} |
695 | |
696 | /// Finds all occurrences of three bytes in a haystack. |
697 | /// |
698 | /// That is, this reports matches of one of three possible bytes. For example, |
699 | /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets |
700 | /// `0`, `2`, `3`, `4` and `5`. |
701 | #[derive(Clone, Copy, Debug)] |
702 | pub struct Three(generic::Three<__m128i>); |
703 | |
704 | impl Three { |
705 | /// Create a new searcher that finds occurrences of the needle bytes given. |
706 | /// |
707 | /// This particular searcher is specialized to use SSE2 vector instructions |
708 | /// that typically make it quite fast. |
709 | /// |
710 | /// If SSE2 is unavailable in the current environment, then `None` is |
711 | /// returned. |
712 | #[inline ] |
713 | pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { |
714 | if Three::is_available() { |
715 | // SAFETY: we check that sse2 is available above. |
716 | unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } |
717 | } else { |
718 | None |
719 | } |
720 | } |
721 | |
722 | /// Create a new finder specific to SSE2 vectors and routines without |
723 | /// checking that SSE2 is available. |
724 | /// |
725 | /// # Safety |
726 | /// |
727 | /// Callers must guarantee that it is safe to execute `sse2` instructions |
728 | /// in the current environment. |
729 | /// |
730 | /// Note that it is a common misconception that if one compiles for an |
731 | /// `x86_64` target, then they therefore automatically have access to SSE2 |
732 | /// instructions. While this is almost always the case, it isn't true in |
733 | /// 100% of cases. |
734 | #[target_feature (enable = "sse2" )] |
735 | #[inline ] |
736 | pub unsafe fn new_unchecked( |
737 | needle1: u8, |
738 | needle2: u8, |
739 | needle3: u8, |
740 | ) -> Three { |
741 | Three(generic::Three::new(needle1, needle2, needle3)) |
742 | } |
743 | |
744 | /// Returns true when this implementation is available in the current |
745 | /// environment. |
746 | /// |
747 | /// When this is true, it is guaranteed that [`Three::new`] will return |
748 | /// a `Some` value. Similarly, when it is false, it is guaranteed that |
749 | /// `Three::new` will return a `None` value. |
750 | /// |
751 | /// Note also that for the lifetime of a single program, if this returns |
752 | /// true then it will always return true. |
753 | #[inline ] |
754 | pub fn is_available() -> bool { |
755 | #[cfg (target_feature = "sse2" )] |
756 | { |
757 | true |
758 | } |
759 | #[cfg (not(target_feature = "sse2" ))] |
760 | { |
761 | false |
762 | } |
763 | } |
764 | |
765 | /// Return the first occurrence of one of the needle bytes in the given |
766 | /// haystack. If no such occurrence exists, then `None` is returned. |
767 | /// |
768 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
769 | /// value is `haystack.len() - 1`. |
770 | #[inline ] |
771 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
772 | // SAFETY: `find_raw` guarantees that if a pointer is returned, it |
773 | // falls within the bounds of the start and end pointers. |
774 | unsafe { |
775 | generic::search_slice_with_raw(haystack, |s, e| { |
776 | self.find_raw(s, e) |
777 | }) |
778 | } |
779 | } |
780 | |
781 | /// Return the last occurrence of one of the needle bytes in the given |
782 | /// haystack. If no such occurrence exists, then `None` is returned. |
783 | /// |
784 | /// The occurrence is reported as an offset into `haystack`. Its maximum |
785 | /// value is `haystack.len() - 1`. |
786 | #[inline ] |
787 | pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
788 | // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it |
789 | // falls within the bounds of the start and end pointers. |
790 | unsafe { |
791 | generic::search_slice_with_raw(haystack, |s, e| { |
792 | self.rfind_raw(s, e) |
793 | }) |
794 | } |
795 | } |
796 | |
797 | /// Like `find`, but accepts and returns raw pointers. |
798 | /// |
799 | /// When a match is found, the pointer returned is guaranteed to be |
800 | /// `>= start` and `< end`. |
801 | /// |
802 | /// This routine is useful if you're already using raw pointers and would |
803 | /// like to avoid converting back to a slice before executing a search. |
804 | /// |
805 | /// # Safety |
806 | /// |
807 | /// * Both `start` and `end` must be valid for reads. |
808 | /// * Both `start` and `end` must point to an initialized value. |
809 | /// * Both `start` and `end` must point to the same allocated object and |
810 | /// must either be in bounds or at most one byte past the end of the |
811 | /// allocated object. |
812 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
813 | /// object. |
814 | /// * The distance between `start` and `end` must not overflow `isize`. |
815 | /// * The distance being in bounds must not rely on "wrapping around" the |
816 | /// address space. |
817 | /// |
818 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
819 | /// In that case, `None` will always be returned. |
820 | #[inline ] |
821 | pub unsafe fn find_raw( |
822 | &self, |
823 | start: *const u8, |
824 | end: *const u8, |
825 | ) -> Option<*const u8> { |
826 | if start >= end { |
827 | return None; |
828 | } |
829 | if end.distance(start) < __m128i::BYTES { |
830 | // SAFETY: We require the caller to pass valid start/end pointers. |
831 | return generic::fwd_byte_by_byte(start, end, |b| { |
832 | b == self.0.needle1() |
833 | || b == self.0.needle2() |
834 | || b == self.0.needle3() |
835 | }); |
836 | } |
837 | // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. |
838 | // Also, we've checked that our haystack is big enough to run on the |
839 | // vector routine. Pointer validity is caller's responsibility. |
840 | // |
841 | // Note that we could call `self.0.find_raw` directly here. But that |
842 | // means we'd have to annotate this routine with `target_feature`. |
843 | // Which is fine, because this routine is `unsafe` anyway and the |
844 | // `target_feature` obligation is met by virtue of building a `Three`. |
845 | // The real problem is that a routine with a `target_feature` |
846 | // annotation generally can't be inlined into caller code unless the |
847 | // caller code has the same target feature annotations. Which is maybe |
848 | // okay for SSE2, but we do the same thing for AVX2 where caller code |
849 | // probably usually doesn't have AVX2 enabled. That means that this |
850 | // routine can be inlined which will handle some of the short-haystack |
851 | // cases above without touching the architecture specific code. |
852 | self.find_raw_impl(start, end) |
853 | } |
854 | |
855 | /// Like `rfind`, but accepts and returns raw pointers. |
856 | /// |
857 | /// When a match is found, the pointer returned is guaranteed to be |
858 | /// `>= start` and `< end`. |
859 | /// |
860 | /// This routine is useful if you're already using raw pointers and would |
861 | /// like to avoid converting back to a slice before executing a search. |
862 | /// |
863 | /// # Safety |
864 | /// |
865 | /// * Both `start` and `end` must be valid for reads. |
866 | /// * Both `start` and `end` must point to an initialized value. |
867 | /// * Both `start` and `end` must point to the same allocated object and |
868 | /// must either be in bounds or at most one byte past the end of the |
869 | /// allocated object. |
870 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
871 | /// object. |
872 | /// * The distance between `start` and `end` must not overflow `isize`. |
873 | /// * The distance being in bounds must not rely on "wrapping around" the |
874 | /// address space. |
875 | /// |
876 | /// Note that callers may pass a pair of pointers such that `start >= end`. |
877 | /// In that case, `None` will always be returned. |
878 | #[inline ] |
879 | pub unsafe fn rfind_raw( |
880 | &self, |
881 | start: *const u8, |
882 | end: *const u8, |
883 | ) -> Option<*const u8> { |
884 | if start >= end { |
885 | return None; |
886 | } |
887 | if end.distance(start) < __m128i::BYTES { |
888 | // SAFETY: We require the caller to pass valid start/end pointers. |
889 | return generic::rev_byte_by_byte(start, end, |b| { |
890 | b == self.0.needle1() |
891 | || b == self.0.needle2() |
892 | || b == self.0.needle3() |
893 | }); |
894 | } |
895 | // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. |
896 | // Also, we've checked that our haystack is big enough to run on the |
897 | // vector routine. Pointer validity is caller's responsibility. |
898 | // |
899 | // See note in forward routine above for why we don't just call |
900 | // `self.0.rfind_raw` directly here. |
901 | self.rfind_raw_impl(start, end) |
902 | } |
903 | |
904 | /// Execute a search using SSE2 vectors and routines. |
905 | /// |
906 | /// # Safety |
907 | /// |
908 | /// Same as [`Three::find_raw`], except the distance between `start` and |
909 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
910 | /// |
911 | /// (The target feature safety obligation is automatically fulfilled by |
912 | /// virtue of being a method on `Three`, which can only be constructed |
913 | /// when it is safe to call `sse2` routines.) |
914 | #[target_feature (enable = "sse2" )] |
915 | #[inline ] |
916 | unsafe fn find_raw_impl( |
917 | &self, |
918 | start: *const u8, |
919 | end: *const u8, |
920 | ) -> Option<*const u8> { |
921 | self.0.find_raw(start, end) |
922 | } |
923 | |
924 | /// Execute a search using SSE2 vectors and routines. |
925 | /// |
926 | /// # Safety |
927 | /// |
928 | /// Same as [`Three::rfind_raw`], except the distance between `start` and |
929 | /// `end` must be at least the size of an SSE2 vector (in bytes). |
930 | /// |
931 | /// (The target feature safety obligation is automatically fulfilled by |
932 | /// virtue of being a method on `Three`, which can only be constructed |
933 | /// when it is safe to call `sse2` routines.) |
934 | #[target_feature (enable = "sse2" )] |
935 | #[inline ] |
936 | unsafe fn rfind_raw_impl( |
937 | &self, |
938 | start: *const u8, |
939 | end: *const u8, |
940 | ) -> Option<*const u8> { |
941 | self.0.rfind_raw(start, end) |
942 | } |
943 | |
944 | /// Returns an iterator over all occurrences of the needle byte in the |
945 | /// given haystack. |
946 | /// |
947 | /// The iterator returned implements `DoubleEndedIterator`. This means it |
948 | /// can also be used to find occurrences in reverse order. |
949 | #[inline ] |
950 | pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { |
951 | ThreeIter { searcher: self, it: generic::Iter::new(haystack) } |
952 | } |
953 | } |
954 | |
955 | /// An iterator over all occurrences of three possible bytes in a haystack. |
956 | /// |
957 | /// This iterator implements `DoubleEndedIterator`, which means it can also be |
958 | /// used to find occurrences in reverse order. |
959 | /// |
960 | /// This iterator is created by the [`Three::iter`] method. |
961 | /// |
962 | /// The lifetime parameters are as follows: |
963 | /// |
964 | /// * `'a` refers to the lifetime of the underlying [`Three`] searcher. |
965 | /// * `'h` refers to the lifetime of the haystack being searched. |
966 | #[derive(Clone, Debug)] |
967 | pub struct ThreeIter<'a, 'h> { |
968 | searcher: &'a Three, |
969 | it: generic::Iter<'h>, |
970 | } |
971 | |
972 | impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { |
973 | type Item = usize; |
974 | |
975 | #[inline ] |
976 | fn next(&mut self) -> Option<usize> { |
977 | // SAFETY: We rely on the generic iterator to provide valid start |
978 | // and end pointers, but we guarantee that any pointer returned by |
979 | // 'find_raw' falls within the bounds of the start and end pointer. |
980 | unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } |
981 | } |
982 | |
983 | #[inline ] |
984 | fn size_hint(&self) -> (usize, Option<usize>) { |
985 | self.it.size_hint() |
986 | } |
987 | } |
988 | |
989 | impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { |
990 | #[inline ] |
991 | fn next_back(&mut self) -> Option<usize> { |
992 | // SAFETY: We rely on the generic iterator to provide valid start |
993 | // and end pointers, but we guarantee that any pointer returned by |
994 | // 'rfind_raw' falls within the bounds of the start and end pointer. |
995 | unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } |
996 | } |
997 | } |
998 | |
999 | impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} |
1000 | |
1001 | #[cfg (test)] |
1002 | mod tests { |
1003 | use super::*; |
1004 | |
1005 | define_memchr_quickcheck!(super); |
1006 | |
1007 | #[test] |
1008 | fn forward_one() { |
1009 | crate::tests::memchr::Runner::new(1).forward_iter( |
1010 | |haystack, needles| { |
1011 | Some(One::new(needles[0])?.iter(haystack).collect()) |
1012 | }, |
1013 | ) |
1014 | } |
1015 | |
1016 | #[test] |
1017 | fn reverse_one() { |
1018 | crate::tests::memchr::Runner::new(1).reverse_iter( |
1019 | |haystack, needles| { |
1020 | Some(One::new(needles[0])?.iter(haystack).rev().collect()) |
1021 | }, |
1022 | ) |
1023 | } |
1024 | |
1025 | #[test] |
1026 | fn count_one() { |
1027 | crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { |
1028 | Some(One::new(needles[0])?.iter(haystack).count()) |
1029 | }) |
1030 | } |
1031 | |
1032 | #[test] |
1033 | fn forward_two() { |
1034 | crate::tests::memchr::Runner::new(2).forward_iter( |
1035 | |haystack, needles| { |
1036 | let n1 = needles.get(0).copied()?; |
1037 | let n2 = needles.get(1).copied()?; |
1038 | Some(Two::new(n1, n2)?.iter(haystack).collect()) |
1039 | }, |
1040 | ) |
1041 | } |
1042 | |
1043 | #[test] |
1044 | fn reverse_two() { |
1045 | crate::tests::memchr::Runner::new(2).reverse_iter( |
1046 | |haystack, needles| { |
1047 | let n1 = needles.get(0).copied()?; |
1048 | let n2 = needles.get(1).copied()?; |
1049 | Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) |
1050 | }, |
1051 | ) |
1052 | } |
1053 | |
1054 | #[test] |
1055 | fn forward_three() { |
1056 | crate::tests::memchr::Runner::new(3).forward_iter( |
1057 | |haystack, needles| { |
1058 | let n1 = needles.get(0).copied()?; |
1059 | let n2 = needles.get(1).copied()?; |
1060 | let n3 = needles.get(2).copied()?; |
1061 | Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) |
1062 | }, |
1063 | ) |
1064 | } |
1065 | |
1066 | #[test] |
1067 | fn reverse_three() { |
1068 | crate::tests::memchr::Runner::new(3).reverse_iter( |
1069 | |haystack, needles| { |
1070 | let n1 = needles.get(0).copied()?; |
1071 | let n2 = needles.get(1).copied()?; |
1072 | let n3 = needles.get(2).copied()?; |
1073 | Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) |
1074 | }, |
1075 | ) |
1076 | } |
1077 | } |
1078 | |