1 | /*! |
2 | Generic crate-internal routines for the `memchr` family of functions. |
3 | */ |
4 | |
5 | // What follows is a vector algorithm generic over the specific vector |
6 | // type to detect the position of one, two or three needles in a haystack. |
7 | // From what I know, this is a "classic" algorithm, although I don't |
8 | // believe it has been published in any peer reviewed journal. I believe |
9 | // it can be found in places like glibc and Go's standard library. It |
10 | // appears to be well known and is elaborated on in more detail here: |
11 | // https://gms.tf/stdfind-and-memchr-optimizations.html |
12 | // |
13 | // While the routine below is fairly long and perhaps intimidating, the basic |
14 | // idea is actually very simple and can be expressed straight-forwardly in |
15 | // pseudo code. The psuedo code below is written for 128 bit vectors, but the |
16 | // actual code below works for anything that implements the Vector trait. |
17 | // |
18 | // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 |
19 | // // Note: shift amount is in bytes |
20 | // |
21 | // while i <= haystack.len() - 16: |
22 | // // A 16 byte vector. Each byte in chunk corresponds to a byte in |
23 | // // the haystack. |
24 | // chunk = haystack[i:i+16] |
25 | // // Compare bytes in needle with bytes in chunk. The result is a 16 |
26 | // // byte chunk where each byte is 0xFF if the corresponding bytes |
27 | // // in needle and chunk were equal, or 0x00 otherwise. |
28 | // eqs = cmpeq(needle, chunk) |
29 | // // Return a 32 bit integer where the most significant 16 bits |
30 | // // are always 0 and the lower 16 bits correspond to whether the |
31 | // // most significant bit in the correspond byte in `eqs` is set. |
32 | // // In other words, `mask as u16` has bit i set if and only if |
33 | // // needle[i] == chunk[i]. |
34 | // mask = movemask(eqs) |
35 | // |
36 | // // Mask is 0 if there is no match, and non-zero otherwise. |
37 | // if mask != 0: |
38 | // // trailing_zeros tells us the position of the least significant |
39 | // // bit that is set. |
40 | // return i + trailing_zeros(mask) |
41 | // |
42 | // // haystack length may not be a multiple of 16, so search the rest. |
43 | // while i < haystack.len(): |
44 | // if haystack[i] == n1: |
45 | // return i |
46 | // |
47 | // // No match found. |
48 | // return NULL |
49 | // |
50 | // In fact, we could loosely translate the above code to Rust line-for-line |
51 | // and it would be a pretty fast algorithm. But, we pull out all the stops |
52 | // to go as fast as possible: |
53 | // |
54 | // 1. We use aligned loads. That is, we do some finagling to make sure our |
55 | // primary loop not only proceeds in increments of 16 bytes, but that |
56 | // the address of haystack's pointer that we dereference is aligned to |
57 | // 16 bytes. 16 is a magic number here because it is the size of SSE2 |
58 | // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) |
59 | // Therefore, to get aligned loads, our pointer's address must be evenly |
60 | // divisible by 16. |
61 | // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's |
62 | // kind of like loop unrolling, but we combine the equality comparisons |
63 | // using a vector OR such that we only need to extract a single mask to |
64 | // determine whether a match exists or not. If so, then we do some |
65 | // book-keeping to determine the precise location but otherwise mush on. |
66 | // 3. We use our "chunk" comparison routine in as many places as possible, |
67 | // even if it means using unaligned loads. In particular, if haystack |
68 | // starts with an unaligned address, then we do an unaligned load to |
69 | // search the first 16 bytes. We then start our primary loop at the |
70 | // smallest subsequent aligned address, which will actually overlap with |
71 | // previously searched bytes. But we're OK with that. We do a similar |
72 | // dance at the end of our primary loop. Finally, to avoid a |
73 | // byte-at-a-time loop at the end, we do a final 16 byte unaligned load |
74 | // that may overlap with a previous load. This is OK because it converts |
75 | // a loop into a small number of very fast vector instructions. The overlap |
76 | // is OK because we know the place where the overlap occurs does not |
77 | // contain a match. |
78 | // |
79 | // And that's pretty all there is to it. Note that since the below is |
80 | // generic and since it's meant to be inlined into routines with a |
81 | // `#[target_feature(enable = "...")]` annotation, we must mark all routines as |
82 | // both unsafe and `#[inline(always)]`. |
83 | // |
84 | // The fact that the code below is generic does somewhat inhibit us. For |
85 | // example, I've noticed that introducing an unlineable `#[cold]` function to |
86 | // handle the match case in the loop generates tighter assembly, but there is |
87 | // no way to do this in the generic code below because the generic code doesn't |
88 | // know what `target_feature` annotation to apply to the unlineable function. |
89 | // We could make such functions part of the `Vector` trait, but we instead live |
90 | // with the slightly sub-optimal codegen for now since it doesn't seem to have |
91 | // a noticeable perf difference. |
92 | |
93 | use crate::{ |
94 | ext::Pointer, |
95 | vector::{MoveMask, Vector}, |
96 | }; |
97 | |
98 | /// Finds all occurrences of a single byte in a haystack. |
99 | #[derive(Clone, Copy, Debug)] |
100 | pub(crate) struct One<V> { |
101 | s1: u8, |
102 | v1: V, |
103 | } |
104 | |
105 | impl<V: Vector> One<V> { |
106 | /// The number of bytes we examine per each iteration of our search loop. |
107 | const LOOP_SIZE: usize = 4 * V::BYTES; |
108 | |
109 | /// Create a new searcher that finds occurrences of the byte given. |
110 | #[inline (always)] |
111 | pub(crate) unsafe fn new(needle: u8) -> One<V> { |
112 | One { s1: needle, v1: V::splat(needle) } |
113 | } |
114 | |
115 | /// Returns the needle given to `One::new`. |
116 | #[inline (always)] |
117 | pub(crate) fn needle1(&self) -> u8 { |
118 | self.s1 |
119 | } |
120 | |
121 | /// Return a pointer to the first occurrence of the needle in the given |
122 | /// haystack. If no such occurrence exists, then `None` is returned. |
123 | /// |
124 | /// When a match is found, the pointer returned is guaranteed to be |
125 | /// `>= start` and `< end`. |
126 | /// |
127 | /// # Safety |
128 | /// |
129 | /// * It must be the case that `start < end` and that the distance between |
130 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
131 | /// to do at least an unaligned load of `V` at `start`. |
132 | /// * Both `start` and `end` must be valid for reads. |
133 | /// * Both `start` and `end` must point to an initialized value. |
134 | /// * Both `start` and `end` must point to the same allocated object and |
135 | /// must either be in bounds or at most one byte past the end of the |
136 | /// allocated object. |
137 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
138 | /// object. |
139 | /// * The distance between `start` and `end` must not overflow `isize`. |
140 | /// * The distance being in bounds must not rely on "wrapping around" the |
141 | /// address space. |
142 | #[inline (always)] |
143 | pub(crate) unsafe fn find_raw( |
144 | &self, |
145 | start: *const u8, |
146 | end: *const u8, |
147 | ) -> Option<*const u8> { |
148 | // If we want to support vectors bigger than 256 bits, we probably |
149 | // need to move up to using a u64 for the masks used below. Currently |
150 | // they are 32 bits, which means we're SOL for vectors that need masks |
151 | // bigger than 32 bits. Overall unclear until there's a use case. |
152 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
153 | |
154 | let topos = V::Mask::first_offset; |
155 | let len = end.distance(start); |
156 | debug_assert!( |
157 | len >= V::BYTES, |
158 | "haystack has length {}, but must be at least {}" , |
159 | len, |
160 | V::BYTES |
161 | ); |
162 | |
163 | // Search a possibly unaligned chunk at `start`. This covers any part |
164 | // of the haystack prior to where aligned loads can start. |
165 | if let Some(cur) = self.search_chunk(start, topos) { |
166 | return Some(cur); |
167 | } |
168 | // Set `cur` to the first V-aligned pointer greater than `start`. |
169 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |
170 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |
171 | if len >= Self::LOOP_SIZE { |
172 | while cur <= end.sub(Self::LOOP_SIZE) { |
173 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
174 | |
175 | let a = V::load_aligned(cur); |
176 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |
177 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |
178 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |
179 | let eqa = self.v1.cmpeq(a); |
180 | let eqb = self.v1.cmpeq(b); |
181 | let eqc = self.v1.cmpeq(c); |
182 | let eqd = self.v1.cmpeq(d); |
183 | let or1 = eqa.or(eqb); |
184 | let or2 = eqc.or(eqd); |
185 | let or3 = or1.or(or2); |
186 | if or3.movemask_will_have_non_zero() { |
187 | let mask = eqa.movemask(); |
188 | if mask.has_non_zero() { |
189 | return Some(cur.add(topos(mask))); |
190 | } |
191 | |
192 | let mask = eqb.movemask(); |
193 | if mask.has_non_zero() { |
194 | return Some(cur.add(1 * V::BYTES).add(topos(mask))); |
195 | } |
196 | |
197 | let mask = eqc.movemask(); |
198 | if mask.has_non_zero() { |
199 | return Some(cur.add(2 * V::BYTES).add(topos(mask))); |
200 | } |
201 | |
202 | let mask = eqd.movemask(); |
203 | debug_assert!(mask.has_non_zero()); |
204 | return Some(cur.add(3 * V::BYTES).add(topos(mask))); |
205 | } |
206 | cur = cur.add(Self::LOOP_SIZE); |
207 | } |
208 | } |
209 | // Handle any leftovers after the aligned loop above. We use unaligned |
210 | // loads here, but I believe we are guaranteed that they are aligned |
211 | // since `cur` is aligned. |
212 | while cur <= end.sub(V::BYTES) { |
213 | debug_assert!(end.distance(cur) >= V::BYTES); |
214 | if let Some(cur) = self.search_chunk(cur, topos) { |
215 | return Some(cur); |
216 | } |
217 | cur = cur.add(V::BYTES); |
218 | } |
219 | // Finally handle any remaining bytes less than the size of V. In this |
220 | // case, our pointer may indeed be unaligned and the load may overlap |
221 | // with the previous one. But that's okay since we know the previous |
222 | // load didn't lead to a match (otherwise we wouldn't be here). |
223 | if cur < end { |
224 | debug_assert!(end.distance(cur) < V::BYTES); |
225 | cur = cur.sub(V::BYTES - end.distance(cur)); |
226 | debug_assert_eq!(end.distance(cur), V::BYTES); |
227 | return self.search_chunk(cur, topos); |
228 | } |
229 | None |
230 | } |
231 | |
232 | /// Return a pointer to the last occurrence of the needle in the given |
233 | /// haystack. If no such occurrence exists, then `None` is returned. |
234 | /// |
235 | /// When a match is found, the pointer returned is guaranteed to be |
236 | /// `>= start` and `< end`. |
237 | /// |
238 | /// # Safety |
239 | /// |
240 | /// * It must be the case that `start < end` and that the distance between |
241 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
242 | /// to do at least an unaligned load of `V` at `start`. |
243 | /// * Both `start` and `end` must be valid for reads. |
244 | /// * Both `start` and `end` must point to an initialized value. |
245 | /// * Both `start` and `end` must point to the same allocated object and |
246 | /// must either be in bounds or at most one byte past the end of the |
247 | /// allocated object. |
248 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
249 | /// object. |
250 | /// * The distance between `start` and `end` must not overflow `isize`. |
251 | /// * The distance being in bounds must not rely on "wrapping around" the |
252 | /// address space. |
253 | #[inline (always)] |
254 | pub(crate) unsafe fn rfind_raw( |
255 | &self, |
256 | start: *const u8, |
257 | end: *const u8, |
258 | ) -> Option<*const u8> { |
259 | // If we want to support vectors bigger than 256 bits, we probably |
260 | // need to move up to using a u64 for the masks used below. Currently |
261 | // they are 32 bits, which means we're SOL for vectors that need masks |
262 | // bigger than 32 bits. Overall unclear until there's a use case. |
263 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
264 | |
265 | let topos = V::Mask::last_offset; |
266 | let len = end.distance(start); |
267 | debug_assert!( |
268 | len >= V::BYTES, |
269 | "haystack has length {}, but must be at least {}" , |
270 | len, |
271 | V::BYTES |
272 | ); |
273 | |
274 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |
275 | return Some(cur); |
276 | } |
277 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |
278 | debug_assert!(start <= cur && cur <= end); |
279 | if len >= Self::LOOP_SIZE { |
280 | while cur >= start.add(Self::LOOP_SIZE) { |
281 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
282 | |
283 | cur = cur.sub(Self::LOOP_SIZE); |
284 | let a = V::load_aligned(cur); |
285 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |
286 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |
287 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |
288 | let eqa = self.v1.cmpeq(a); |
289 | let eqb = self.v1.cmpeq(b); |
290 | let eqc = self.v1.cmpeq(c); |
291 | let eqd = self.v1.cmpeq(d); |
292 | let or1 = eqa.or(eqb); |
293 | let or2 = eqc.or(eqd); |
294 | let or3 = or1.or(or2); |
295 | if or3.movemask_will_have_non_zero() { |
296 | let mask = eqd.movemask(); |
297 | if mask.has_non_zero() { |
298 | return Some(cur.add(3 * V::BYTES).add(topos(mask))); |
299 | } |
300 | |
301 | let mask = eqc.movemask(); |
302 | if mask.has_non_zero() { |
303 | return Some(cur.add(2 * V::BYTES).add(topos(mask))); |
304 | } |
305 | |
306 | let mask = eqb.movemask(); |
307 | if mask.has_non_zero() { |
308 | return Some(cur.add(1 * V::BYTES).add(topos(mask))); |
309 | } |
310 | |
311 | let mask = eqa.movemask(); |
312 | debug_assert!(mask.has_non_zero()); |
313 | return Some(cur.add(topos(mask))); |
314 | } |
315 | } |
316 | } |
317 | while cur >= start.add(V::BYTES) { |
318 | debug_assert!(cur.distance(start) >= V::BYTES); |
319 | cur = cur.sub(V::BYTES); |
320 | if let Some(cur) = self.search_chunk(cur, topos) { |
321 | return Some(cur); |
322 | } |
323 | } |
324 | if cur > start { |
325 | debug_assert!(cur.distance(start) < V::BYTES); |
326 | return self.search_chunk(start, topos); |
327 | } |
328 | None |
329 | } |
330 | |
331 | /// Return a count of all matching bytes in the given haystack. |
332 | /// |
333 | /// # Safety |
334 | /// |
335 | /// * It must be the case that `start < end` and that the distance between |
336 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
337 | /// to do at least an unaligned load of `V` at `start`. |
338 | /// * Both `start` and `end` must be valid for reads. |
339 | /// * Both `start` and `end` must point to an initialized value. |
340 | /// * Both `start` and `end` must point to the same allocated object and |
341 | /// must either be in bounds or at most one byte past the end of the |
342 | /// allocated object. |
343 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
344 | /// object. |
345 | /// * The distance between `start` and `end` must not overflow `isize`. |
346 | /// * The distance being in bounds must not rely on "wrapping around" the |
347 | /// address space. |
348 | #[inline (always)] |
349 | pub(crate) unsafe fn count_raw( |
350 | &self, |
351 | start: *const u8, |
352 | end: *const u8, |
353 | ) -> usize { |
354 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
355 | |
356 | let confirm = |b| b == self.needle1(); |
357 | let len = end.distance(start); |
358 | debug_assert!( |
359 | len >= V::BYTES, |
360 | "haystack has length {}, but must be at least {}" , |
361 | len, |
362 | V::BYTES |
363 | ); |
364 | |
365 | // Set `cur` to the first V-aligned pointer greater than `start`. |
366 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |
367 | // Count any matching bytes before we start our aligned loop. |
368 | let mut count = count_byte_by_byte(start, cur, confirm); |
369 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |
370 | if len >= Self::LOOP_SIZE { |
371 | while cur <= end.sub(Self::LOOP_SIZE) { |
372 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
373 | |
374 | let a = V::load_aligned(cur); |
375 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |
376 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |
377 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |
378 | let eqa = self.v1.cmpeq(a); |
379 | let eqb = self.v1.cmpeq(b); |
380 | let eqc = self.v1.cmpeq(c); |
381 | let eqd = self.v1.cmpeq(d); |
382 | count += eqa.movemask().count_ones(); |
383 | count += eqb.movemask().count_ones(); |
384 | count += eqc.movemask().count_ones(); |
385 | count += eqd.movemask().count_ones(); |
386 | cur = cur.add(Self::LOOP_SIZE); |
387 | } |
388 | } |
389 | // Handle any leftovers after the aligned loop above. We use unaligned |
390 | // loads here, but I believe we are guaranteed that they are aligned |
391 | // since `cur` is aligned. |
392 | while cur <= end.sub(V::BYTES) { |
393 | debug_assert!(end.distance(cur) >= V::BYTES); |
394 | let chunk = V::load_unaligned(cur); |
395 | count += self.v1.cmpeq(chunk).movemask().count_ones(); |
396 | cur = cur.add(V::BYTES); |
397 | } |
398 | // And finally count any leftovers that weren't caught above. |
399 | count += count_byte_by_byte(cur, end, confirm); |
400 | count |
401 | } |
402 | |
403 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |
404 | /// |
405 | /// `mask_to_offset` should be a function that converts a `movemask` to |
406 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |
407 | /// match location if one is found. Generally it is expected to use either |
408 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |
409 | /// one is implementing a forward or reverse search, respectively. |
410 | /// |
411 | /// # Safety |
412 | /// |
413 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |
414 | /// load of size `V::BYTES` at `cur`. |
415 | #[inline (always)] |
416 | unsafe fn search_chunk( |
417 | &self, |
418 | cur: *const u8, |
419 | mask_to_offset: impl Fn(V::Mask) -> usize, |
420 | ) -> Option<*const u8> { |
421 | let chunk = V::load_unaligned(cur); |
422 | let mask = self.v1.cmpeq(chunk).movemask(); |
423 | if mask.has_non_zero() { |
424 | Some(cur.add(mask_to_offset(mask))) |
425 | } else { |
426 | None |
427 | } |
428 | } |
429 | } |
430 | |
431 | /// Finds all occurrences of two bytes in a haystack. |
432 | /// |
433 | /// That is, this reports matches of one of two possible bytes. For example, |
434 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |
435 | /// `4` and `5`. |
436 | #[derive(Clone, Copy, Debug)] |
437 | pub(crate) struct Two<V> { |
438 | s1: u8, |
439 | s2: u8, |
440 | v1: V, |
441 | v2: V, |
442 | } |
443 | |
444 | impl<V: Vector> Two<V> { |
445 | /// The number of bytes we examine per each iteration of our search loop. |
446 | const LOOP_SIZE: usize = 2 * V::BYTES; |
447 | |
448 | /// Create a new searcher that finds occurrences of the byte given. |
449 | #[inline (always)] |
450 | pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> { |
451 | Two { |
452 | s1: needle1, |
453 | s2: needle2, |
454 | v1: V::splat(needle1), |
455 | v2: V::splat(needle2), |
456 | } |
457 | } |
458 | |
459 | /// Returns the first needle given to `Two::new`. |
460 | #[inline (always)] |
461 | pub(crate) fn needle1(&self) -> u8 { |
462 | self.s1 |
463 | } |
464 | |
465 | /// Returns the second needle given to `Two::new`. |
466 | #[inline (always)] |
467 | pub(crate) fn needle2(&self) -> u8 { |
468 | self.s2 |
469 | } |
470 | |
471 | /// Return a pointer to the first occurrence of one of the needles in the |
472 | /// given haystack. If no such occurrence exists, then `None` is returned. |
473 | /// |
474 | /// When a match is found, the pointer returned is guaranteed to be |
475 | /// `>= start` and `< end`. |
476 | /// |
477 | /// # Safety |
478 | /// |
479 | /// * It must be the case that `start < end` and that the distance between |
480 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
481 | /// to do at least an unaligned load of `V` at `start`. |
482 | /// * Both `start` and `end` must be valid for reads. |
483 | /// * Both `start` and `end` must point to an initialized value. |
484 | /// * Both `start` and `end` must point to the same allocated object and |
485 | /// must either be in bounds or at most one byte past the end of the |
486 | /// allocated object. |
487 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
488 | /// object. |
489 | /// * The distance between `start` and `end` must not overflow `isize`. |
490 | /// * The distance being in bounds must not rely on "wrapping around" the |
491 | /// address space. |
492 | #[inline (always)] |
493 | pub(crate) unsafe fn find_raw( |
494 | &self, |
495 | start: *const u8, |
496 | end: *const u8, |
497 | ) -> Option<*const u8> { |
498 | // If we want to support vectors bigger than 256 bits, we probably |
499 | // need to move up to using a u64 for the masks used below. Currently |
500 | // they are 32 bits, which means we're SOL for vectors that need masks |
501 | // bigger than 32 bits. Overall unclear until there's a use case. |
502 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
503 | |
504 | let topos = V::Mask::first_offset; |
505 | let len = end.distance(start); |
506 | debug_assert!( |
507 | len >= V::BYTES, |
508 | "haystack has length {}, but must be at least {}" , |
509 | len, |
510 | V::BYTES |
511 | ); |
512 | |
513 | // Search a possibly unaligned chunk at `start`. This covers any part |
514 | // of the haystack prior to where aligned loads can start. |
515 | if let Some(cur) = self.search_chunk(start, topos) { |
516 | return Some(cur); |
517 | } |
518 | // Set `cur` to the first V-aligned pointer greater than `start`. |
519 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |
520 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |
521 | if len >= Self::LOOP_SIZE { |
522 | while cur <= end.sub(Self::LOOP_SIZE) { |
523 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
524 | |
525 | let a = V::load_aligned(cur); |
526 | let b = V::load_aligned(cur.add(V::BYTES)); |
527 | let eqa1 = self.v1.cmpeq(a); |
528 | let eqb1 = self.v1.cmpeq(b); |
529 | let eqa2 = self.v2.cmpeq(a); |
530 | let eqb2 = self.v2.cmpeq(b); |
531 | let or1 = eqa1.or(eqb1); |
532 | let or2 = eqa2.or(eqb2); |
533 | let or3 = or1.or(or2); |
534 | if or3.movemask_will_have_non_zero() { |
535 | let mask = eqa1.movemask().or(eqa2.movemask()); |
536 | if mask.has_non_zero() { |
537 | return Some(cur.add(topos(mask))); |
538 | } |
539 | |
540 | let mask = eqb1.movemask().or(eqb2.movemask()); |
541 | debug_assert!(mask.has_non_zero()); |
542 | return Some(cur.add(V::BYTES).add(topos(mask))); |
543 | } |
544 | cur = cur.add(Self::LOOP_SIZE); |
545 | } |
546 | } |
547 | // Handle any leftovers after the aligned loop above. We use unaligned |
548 | // loads here, but I believe we are guaranteed that they are aligned |
549 | // since `cur` is aligned. |
550 | while cur <= end.sub(V::BYTES) { |
551 | debug_assert!(end.distance(cur) >= V::BYTES); |
552 | if let Some(cur) = self.search_chunk(cur, topos) { |
553 | return Some(cur); |
554 | } |
555 | cur = cur.add(V::BYTES); |
556 | } |
557 | // Finally handle any remaining bytes less than the size of V. In this |
558 | // case, our pointer may indeed be unaligned and the load may overlap |
559 | // with the previous one. But that's okay since we know the previous |
560 | // load didn't lead to a match (otherwise we wouldn't be here). |
561 | if cur < end { |
562 | debug_assert!(end.distance(cur) < V::BYTES); |
563 | cur = cur.sub(V::BYTES - end.distance(cur)); |
564 | debug_assert_eq!(end.distance(cur), V::BYTES); |
565 | return self.search_chunk(cur, topos); |
566 | } |
567 | None |
568 | } |
569 | |
570 | /// Return a pointer to the last occurrence of the needle in the given |
571 | /// haystack. If no such occurrence exists, then `None` is returned. |
572 | /// |
573 | /// When a match is found, the pointer returned is guaranteed to be |
574 | /// `>= start` and `< end`. |
575 | /// |
576 | /// # Safety |
577 | /// |
578 | /// * It must be the case that `start < end` and that the distance between |
579 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
580 | /// to do at least an unaligned load of `V` at `start`. |
581 | /// * Both `start` and `end` must be valid for reads. |
582 | /// * Both `start` and `end` must point to an initialized value. |
583 | /// * Both `start` and `end` must point to the same allocated object and |
584 | /// must either be in bounds or at most one byte past the end of the |
585 | /// allocated object. |
586 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
587 | /// object. |
588 | /// * The distance between `start` and `end` must not overflow `isize`. |
589 | /// * The distance being in bounds must not rely on "wrapping around" the |
590 | /// address space. |
591 | #[inline (always)] |
592 | pub(crate) unsafe fn rfind_raw( |
593 | &self, |
594 | start: *const u8, |
595 | end: *const u8, |
596 | ) -> Option<*const u8> { |
597 | // If we want to support vectors bigger than 256 bits, we probably |
598 | // need to move up to using a u64 for the masks used below. Currently |
599 | // they are 32 bits, which means we're SOL for vectors that need masks |
600 | // bigger than 32 bits. Overall unclear until there's a use case. |
601 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
602 | |
603 | let topos = V::Mask::last_offset; |
604 | let len = end.distance(start); |
605 | debug_assert!( |
606 | len >= V::BYTES, |
607 | "haystack has length {}, but must be at least {}" , |
608 | len, |
609 | V::BYTES |
610 | ); |
611 | |
612 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |
613 | return Some(cur); |
614 | } |
615 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |
616 | debug_assert!(start <= cur && cur <= end); |
617 | if len >= Self::LOOP_SIZE { |
618 | while cur >= start.add(Self::LOOP_SIZE) { |
619 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
620 | |
621 | cur = cur.sub(Self::LOOP_SIZE); |
622 | let a = V::load_aligned(cur); |
623 | let b = V::load_aligned(cur.add(V::BYTES)); |
624 | let eqa1 = self.v1.cmpeq(a); |
625 | let eqb1 = self.v1.cmpeq(b); |
626 | let eqa2 = self.v2.cmpeq(a); |
627 | let eqb2 = self.v2.cmpeq(b); |
628 | let or1 = eqa1.or(eqb1); |
629 | let or2 = eqa2.or(eqb2); |
630 | let or3 = or1.or(or2); |
631 | if or3.movemask_will_have_non_zero() { |
632 | let mask = eqb1.movemask().or(eqb2.movemask()); |
633 | if mask.has_non_zero() { |
634 | return Some(cur.add(V::BYTES).add(topos(mask))); |
635 | } |
636 | |
637 | let mask = eqa1.movemask().or(eqa2.movemask()); |
638 | debug_assert!(mask.has_non_zero()); |
639 | return Some(cur.add(topos(mask))); |
640 | } |
641 | } |
642 | } |
643 | while cur >= start.add(V::BYTES) { |
644 | debug_assert!(cur.distance(start) >= V::BYTES); |
645 | cur = cur.sub(V::BYTES); |
646 | if let Some(cur) = self.search_chunk(cur, topos) { |
647 | return Some(cur); |
648 | } |
649 | } |
650 | if cur > start { |
651 | debug_assert!(cur.distance(start) < V::BYTES); |
652 | return self.search_chunk(start, topos); |
653 | } |
654 | None |
655 | } |
656 | |
657 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |
658 | /// |
659 | /// `mask_to_offset` should be a function that converts a `movemask` to |
660 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |
661 | /// match location if one is found. Generally it is expected to use either |
662 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |
663 | /// one is implementing a forward or reverse search, respectively. |
664 | /// |
665 | /// # Safety |
666 | /// |
667 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |
668 | /// load of size `V::BYTES` at `cur`. |
669 | #[inline (always)] |
670 | unsafe fn search_chunk( |
671 | &self, |
672 | cur: *const u8, |
673 | mask_to_offset: impl Fn(V::Mask) -> usize, |
674 | ) -> Option<*const u8> { |
675 | let chunk = V::load_unaligned(cur); |
676 | let eq1 = self.v1.cmpeq(chunk); |
677 | let eq2 = self.v2.cmpeq(chunk); |
678 | let mask = eq1.or(eq2).movemask(); |
679 | if mask.has_non_zero() { |
680 | let mask1 = eq1.movemask(); |
681 | let mask2 = eq2.movemask(); |
682 | Some(cur.add(mask_to_offset(mask1.or(mask2)))) |
683 | } else { |
684 | None |
685 | } |
686 | } |
687 | } |
688 | |
689 | /// Finds all occurrences of two bytes in a haystack. |
690 | /// |
691 | /// That is, this reports matches of one of two possible bytes. For example, |
692 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |
693 | /// `4` and `5`. |
694 | #[derive(Clone, Copy, Debug)] |
695 | pub(crate) struct Three<V> { |
696 | s1: u8, |
697 | s2: u8, |
698 | s3: u8, |
699 | v1: V, |
700 | v2: V, |
701 | v3: V, |
702 | } |
703 | |
704 | impl<V: Vector> Three<V> { |
705 | /// The number of bytes we examine per each iteration of our search loop. |
706 | const LOOP_SIZE: usize = 2 * V::BYTES; |
707 | |
708 | /// Create a new searcher that finds occurrences of the byte given. |
709 | #[inline (always)] |
710 | pub(crate) unsafe fn new( |
711 | needle1: u8, |
712 | needle2: u8, |
713 | needle3: u8, |
714 | ) -> Three<V> { |
715 | Three { |
716 | s1: needle1, |
717 | s2: needle2, |
718 | s3: needle3, |
719 | v1: V::splat(needle1), |
720 | v2: V::splat(needle2), |
721 | v3: V::splat(needle3), |
722 | } |
723 | } |
724 | |
725 | /// Returns the first needle given to `Three::new`. |
726 | #[inline (always)] |
727 | pub(crate) fn needle1(&self) -> u8 { |
728 | self.s1 |
729 | } |
730 | |
731 | /// Returns the second needle given to `Three::new`. |
732 | #[inline (always)] |
733 | pub(crate) fn needle2(&self) -> u8 { |
734 | self.s2 |
735 | } |
736 | |
737 | /// Returns the third needle given to `Three::new`. |
738 | #[inline (always)] |
739 | pub(crate) fn needle3(&self) -> u8 { |
740 | self.s3 |
741 | } |
742 | |
743 | /// Return a pointer to the first occurrence of one of the needles in the |
744 | /// given haystack. If no such occurrence exists, then `None` is returned. |
745 | /// |
746 | /// When a match is found, the pointer returned is guaranteed to be |
747 | /// `>= start` and `< end`. |
748 | /// |
749 | /// # Safety |
750 | /// |
751 | /// * It must be the case that `start < end` and that the distance between |
752 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
753 | /// to do at least an unaligned load of `V` at `start`. |
754 | /// * Both `start` and `end` must be valid for reads. |
755 | /// * Both `start` and `end` must point to an initialized value. |
756 | /// * Both `start` and `end` must point to the same allocated object and |
757 | /// must either be in bounds or at most one byte past the end of the |
758 | /// allocated object. |
759 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
760 | /// object. |
761 | /// * The distance between `start` and `end` must not overflow `isize`. |
762 | /// * The distance being in bounds must not rely on "wrapping around" the |
763 | /// address space. |
764 | #[inline (always)] |
765 | pub(crate) unsafe fn find_raw( |
766 | &self, |
767 | start: *const u8, |
768 | end: *const u8, |
769 | ) -> Option<*const u8> { |
770 | // If we want to support vectors bigger than 256 bits, we probably |
771 | // need to move up to using a u64 for the masks used below. Currently |
772 | // they are 32 bits, which means we're SOL for vectors that need masks |
773 | // bigger than 32 bits. Overall unclear until there's a use case. |
774 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
775 | |
776 | let topos = V::Mask::first_offset; |
777 | let len = end.distance(start); |
778 | debug_assert!( |
779 | len >= V::BYTES, |
780 | "haystack has length {}, but must be at least {}" , |
781 | len, |
782 | V::BYTES |
783 | ); |
784 | |
785 | // Search a possibly unaligned chunk at `start`. This covers any part |
786 | // of the haystack prior to where aligned loads can start. |
787 | if let Some(cur) = self.search_chunk(start, topos) { |
788 | return Some(cur); |
789 | } |
790 | // Set `cur` to the first V-aligned pointer greater than `start`. |
791 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |
792 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |
793 | if len >= Self::LOOP_SIZE { |
794 | while cur <= end.sub(Self::LOOP_SIZE) { |
795 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
796 | |
797 | let a = V::load_aligned(cur); |
798 | let b = V::load_aligned(cur.add(V::BYTES)); |
799 | let eqa1 = self.v1.cmpeq(a); |
800 | let eqb1 = self.v1.cmpeq(b); |
801 | let eqa2 = self.v2.cmpeq(a); |
802 | let eqb2 = self.v2.cmpeq(b); |
803 | let eqa3 = self.v3.cmpeq(a); |
804 | let eqb3 = self.v3.cmpeq(b); |
805 | let or1 = eqa1.or(eqb1); |
806 | let or2 = eqa2.or(eqb2); |
807 | let or3 = eqa3.or(eqb3); |
808 | let or4 = or1.or(or2); |
809 | let or5 = or3.or(or4); |
810 | if or5.movemask_will_have_non_zero() { |
811 | let mask = eqa1 |
812 | .movemask() |
813 | .or(eqa2.movemask()) |
814 | .or(eqa3.movemask()); |
815 | if mask.has_non_zero() { |
816 | return Some(cur.add(topos(mask))); |
817 | } |
818 | |
819 | let mask = eqb1 |
820 | .movemask() |
821 | .or(eqb2.movemask()) |
822 | .or(eqb3.movemask()); |
823 | debug_assert!(mask.has_non_zero()); |
824 | return Some(cur.add(V::BYTES).add(topos(mask))); |
825 | } |
826 | cur = cur.add(Self::LOOP_SIZE); |
827 | } |
828 | } |
829 | // Handle any leftovers after the aligned loop above. We use unaligned |
830 | // loads here, but I believe we are guaranteed that they are aligned |
831 | // since `cur` is aligned. |
832 | while cur <= end.sub(V::BYTES) { |
833 | debug_assert!(end.distance(cur) >= V::BYTES); |
834 | if let Some(cur) = self.search_chunk(cur, topos) { |
835 | return Some(cur); |
836 | } |
837 | cur = cur.add(V::BYTES); |
838 | } |
839 | // Finally handle any remaining bytes less than the size of V. In this |
840 | // case, our pointer may indeed be unaligned and the load may overlap |
841 | // with the previous one. But that's okay since we know the previous |
842 | // load didn't lead to a match (otherwise we wouldn't be here). |
843 | if cur < end { |
844 | debug_assert!(end.distance(cur) < V::BYTES); |
845 | cur = cur.sub(V::BYTES - end.distance(cur)); |
846 | debug_assert_eq!(end.distance(cur), V::BYTES); |
847 | return self.search_chunk(cur, topos); |
848 | } |
849 | None |
850 | } |
851 | |
852 | /// Return a pointer to the last occurrence of the needle in the given |
853 | /// haystack. If no such occurrence exists, then `None` is returned. |
854 | /// |
855 | /// When a match is found, the pointer returned is guaranteed to be |
856 | /// `>= start` and `< end`. |
857 | /// |
858 | /// # Safety |
859 | /// |
860 | /// * It must be the case that `start < end` and that the distance between |
861 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |
862 | /// to do at least an unaligned load of `V` at `start`. |
863 | /// * Both `start` and `end` must be valid for reads. |
864 | /// * Both `start` and `end` must point to an initialized value. |
865 | /// * Both `start` and `end` must point to the same allocated object and |
866 | /// must either be in bounds or at most one byte past the end of the |
867 | /// allocated object. |
868 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |
869 | /// object. |
870 | /// * The distance between `start` and `end` must not overflow `isize`. |
871 | /// * The distance being in bounds must not rely on "wrapping around" the |
872 | /// address space. |
873 | #[inline (always)] |
874 | pub(crate) unsafe fn rfind_raw( |
875 | &self, |
876 | start: *const u8, |
877 | end: *const u8, |
878 | ) -> Option<*const u8> { |
879 | // If we want to support vectors bigger than 256 bits, we probably |
880 | // need to move up to using a u64 for the masks used below. Currently |
881 | // they are 32 bits, which means we're SOL for vectors that need masks |
882 | // bigger than 32 bits. Overall unclear until there's a use case. |
883 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes" ); |
884 | |
885 | let topos = V::Mask::last_offset; |
886 | let len = end.distance(start); |
887 | debug_assert!( |
888 | len >= V::BYTES, |
889 | "haystack has length {}, but must be at least {}" , |
890 | len, |
891 | V::BYTES |
892 | ); |
893 | |
894 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |
895 | return Some(cur); |
896 | } |
897 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |
898 | debug_assert!(start <= cur && cur <= end); |
899 | if len >= Self::LOOP_SIZE { |
900 | while cur >= start.add(Self::LOOP_SIZE) { |
901 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |
902 | |
903 | cur = cur.sub(Self::LOOP_SIZE); |
904 | let a = V::load_aligned(cur); |
905 | let b = V::load_aligned(cur.add(V::BYTES)); |
906 | let eqa1 = self.v1.cmpeq(a); |
907 | let eqb1 = self.v1.cmpeq(b); |
908 | let eqa2 = self.v2.cmpeq(a); |
909 | let eqb2 = self.v2.cmpeq(b); |
910 | let eqa3 = self.v3.cmpeq(a); |
911 | let eqb3 = self.v3.cmpeq(b); |
912 | let or1 = eqa1.or(eqb1); |
913 | let or2 = eqa2.or(eqb2); |
914 | let or3 = eqa3.or(eqb3); |
915 | let or4 = or1.or(or2); |
916 | let or5 = or3.or(or4); |
917 | if or5.movemask_will_have_non_zero() { |
918 | let mask = eqb1 |
919 | .movemask() |
920 | .or(eqb2.movemask()) |
921 | .or(eqb3.movemask()); |
922 | if mask.has_non_zero() { |
923 | return Some(cur.add(V::BYTES).add(topos(mask))); |
924 | } |
925 | |
926 | let mask = eqa1 |
927 | .movemask() |
928 | .or(eqa2.movemask()) |
929 | .or(eqa3.movemask()); |
930 | debug_assert!(mask.has_non_zero()); |
931 | return Some(cur.add(topos(mask))); |
932 | } |
933 | } |
934 | } |
935 | while cur >= start.add(V::BYTES) { |
936 | debug_assert!(cur.distance(start) >= V::BYTES); |
937 | cur = cur.sub(V::BYTES); |
938 | if let Some(cur) = self.search_chunk(cur, topos) { |
939 | return Some(cur); |
940 | } |
941 | } |
942 | if cur > start { |
943 | debug_assert!(cur.distance(start) < V::BYTES); |
944 | return self.search_chunk(start, topos); |
945 | } |
946 | None |
947 | } |
948 | |
949 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |
950 | /// |
951 | /// `mask_to_offset` should be a function that converts a `movemask` to |
952 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |
953 | /// match location if one is found. Generally it is expected to use either |
954 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |
955 | /// one is implementing a forward or reverse search, respectively. |
956 | /// |
957 | /// # Safety |
958 | /// |
959 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |
960 | /// load of size `V::BYTES` at `cur`. |
961 | #[inline (always)] |
962 | unsafe fn search_chunk( |
963 | &self, |
964 | cur: *const u8, |
965 | mask_to_offset: impl Fn(V::Mask) -> usize, |
966 | ) -> Option<*const u8> { |
967 | let chunk = V::load_unaligned(cur); |
968 | let eq1 = self.v1.cmpeq(chunk); |
969 | let eq2 = self.v2.cmpeq(chunk); |
970 | let eq3 = self.v3.cmpeq(chunk); |
971 | let mask = eq1.or(eq2).or(eq3).movemask(); |
972 | if mask.has_non_zero() { |
973 | let mask1 = eq1.movemask(); |
974 | let mask2 = eq2.movemask(); |
975 | let mask3 = eq3.movemask(); |
976 | Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3)))) |
977 | } else { |
978 | None |
979 | } |
980 | } |
981 | } |
982 | |
983 | /// An iterator over all occurrences of a set of bytes in a haystack. |
984 | /// |
985 | /// This iterator implements the routines necessary to provide a |
986 | /// `DoubleEndedIterator` impl, which means it can also be used to find |
987 | /// occurrences in reverse order. |
988 | /// |
989 | /// The lifetime parameters are as follows: |
990 | /// |
991 | /// * `'h` refers to the lifetime of the haystack being searched. |
992 | /// |
993 | /// This type is intended to be used to implement all iterators for the |
994 | /// `memchr` family of functions. It handles a tiny bit of marginally tricky |
995 | /// raw pointer math, but otherwise expects the caller to provide `find_raw` |
996 | /// and `rfind_raw` routines for each call of `next` and `next_back`, |
997 | /// respectively. |
998 | #[derive(Clone, Debug)] |
999 | pub(crate) struct Iter<'h> { |
1000 | /// The original starting point into the haystack. We use this to convert |
1001 | /// pointers to offsets. |
1002 | original_start: *const u8, |
1003 | /// The current starting point into the haystack. That is, where the next |
1004 | /// search will begin. |
1005 | start: *const u8, |
1006 | /// The current ending point into the haystack. That is, where the next |
1007 | /// reverse search will begin. |
1008 | end: *const u8, |
1009 | /// A marker for tracking the lifetime of the start/cur_start/cur_end |
1010 | /// pointers above, which all point into the haystack. |
1011 | haystack: core::marker::PhantomData<&'h [u8]>, |
1012 | } |
1013 | |
1014 | // SAFETY: Iter contains no shared references to anything that performs any |
1015 | // interior mutations. Also, the lifetime guarantees that Iter will not outlive |
1016 | // the haystack. |
1017 | unsafe impl<'h> Send for Iter<'h> {} |
1018 | |
1019 | // SAFETY: Iter perform no interior mutations, therefore no explicit |
1020 | // synchronization is necessary. Also, the lifetime guarantees that Iter will |
1021 | // not outlive the haystack. |
1022 | unsafe impl<'h> Sync for Iter<'h> {} |
1023 | |
1024 | impl<'h> Iter<'h> { |
1025 | /// Create a new generic memchr iterator. |
1026 | #[inline (always)] |
1027 | pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> { |
1028 | Iter { |
1029 | original_start: haystack.as_ptr(), |
1030 | start: haystack.as_ptr(), |
1031 | end: haystack.as_ptr().wrapping_add(haystack.len()), |
1032 | haystack: core::marker::PhantomData, |
1033 | } |
1034 | } |
1035 | |
1036 | /// Returns the next occurrence in the forward direction. |
1037 | /// |
1038 | /// # Safety |
1039 | /// |
1040 | /// Callers must ensure that if a pointer is returned from the closure |
1041 | /// provided, then it must be greater than or equal to the start pointer |
1042 | /// and less than the end pointer. |
1043 | #[inline (always)] |
1044 | pub(crate) unsafe fn next( |
1045 | &mut self, |
1046 | mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |
1047 | ) -> Option<usize> { |
1048 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |
1049 | // We only ever modify start/end corresponding to a matching offset |
1050 | // found between start and end. Thus all changes to start/end maintain |
1051 | // our safety requirements. |
1052 | // |
1053 | // The only other assumption we rely on is that the pointer returned |
1054 | // by `find_raw` satisfies `self.start <= found < self.end`, and that |
1055 | // safety contract is forwarded to the caller. |
1056 | let found = find_raw(self.start, self.end)?; |
1057 | let result = found.distance(self.original_start); |
1058 | self.start = found.add(1); |
1059 | Some(result) |
1060 | } |
1061 | |
1062 | /// Returns the number of remaining elements in this iterator. |
1063 | #[inline (always)] |
1064 | pub(crate) fn count( |
1065 | self, |
1066 | mut count_raw: impl FnMut(*const u8, *const u8) -> usize, |
1067 | ) -> usize { |
1068 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |
1069 | // We only ever modify start/end corresponding to a matching offset |
1070 | // found between start and end. Thus all changes to start/end maintain |
1071 | // our safety requirements. |
1072 | count_raw(self.start, self.end) |
1073 | } |
1074 | |
1075 | /// Returns the next occurrence in reverse. |
1076 | /// |
1077 | /// # Safety |
1078 | /// |
1079 | /// Callers must ensure that if a pointer is returned from the closure |
1080 | /// provided, then it must be greater than or equal to the start pointer |
1081 | /// and less than the end pointer. |
1082 | #[inline (always)] |
1083 | pub(crate) unsafe fn next_back( |
1084 | &mut self, |
1085 | mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |
1086 | ) -> Option<usize> { |
1087 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |
1088 | // We only ever modify start/end corresponding to a matching offset |
1089 | // found between start and end. Thus all changes to start/end maintain |
1090 | // our safety requirements. |
1091 | // |
1092 | // The only other assumption we rely on is that the pointer returned |
1093 | // by `rfind_raw` satisfies `self.start <= found < self.end`, and that |
1094 | // safety contract is forwarded to the caller. |
1095 | let found = rfind_raw(self.start, self.end)?; |
1096 | let result = found.distance(self.original_start); |
1097 | self.end = found; |
1098 | Some(result) |
1099 | } |
1100 | |
1101 | /// Provides an implementation of `Iterator::size_hint`. |
1102 | #[inline (always)] |
1103 | pub(crate) fn size_hint(&self) -> (usize, Option<usize>) { |
1104 | (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize()))) |
1105 | } |
1106 | } |
1107 | |
1108 | /// Search a slice using a function that operates on raw pointers. |
1109 | /// |
1110 | /// Given a function to search a contiguous sequence of memory for the location |
1111 | /// of a non-empty set of bytes, this will execute that search on a slice of |
1112 | /// bytes. The pointer returned by the given function will be converted to an |
1113 | /// offset relative to the starting point of the given slice. That is, if a |
1114 | /// match is found, the offset returned by this routine is guaranteed to be a |
1115 | /// valid index into `haystack`. |
1116 | /// |
1117 | /// Callers may use this for a forward or reverse search. |
1118 | /// |
1119 | /// # Safety |
1120 | /// |
1121 | /// Callers must ensure that if a pointer is returned by `find_raw`, then the |
1122 | /// pointer must be greater than or equal to the starting pointer and less than |
1123 | /// the end pointer. |
1124 | #[inline (always)] |
1125 | pub(crate) unsafe fn search_slice_with_raw( |
1126 | haystack: &[u8], |
1127 | mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |
1128 | ) -> Option<usize> { |
1129 | // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but |
1130 | // otherwise, `start` and `end` are valid due to the guarantees provided by |
1131 | // a &[u8]. |
1132 | let start = haystack.as_ptr(); |
1133 | let end = start.add(haystack.len()); |
1134 | let found = find_raw(start, end)?; |
1135 | Some(found.distance(start)) |
1136 | } |
1137 | |
1138 | /// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or |
1139 | /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is |
1140 | /// returned. If the latter occurs, then the pointer at which `confirm` returns |
1141 | /// `true` is returned. |
1142 | /// |
1143 | /// # Safety |
1144 | /// |
1145 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |
1146 | /// ptr` and `ptr <= end_ptr`. |
1147 | #[inline (always)] |
1148 | pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>( |
1149 | start: *const u8, |
1150 | end: *const u8, |
1151 | confirm: F, |
1152 | ) -> Option<*const u8> { |
1153 | debug_assert!(start <= end); |
1154 | let mut ptr = start; |
1155 | while ptr < end { |
1156 | if confirm(*ptr) { |
1157 | return Some(ptr); |
1158 | } |
1159 | ptr = ptr.offset(1); |
1160 | } |
1161 | None |
1162 | } |
1163 | |
1164 | /// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or |
1165 | /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is |
1166 | /// returned. If the latter occurs, then the pointer at which `confirm` returns |
1167 | /// `true` is returned. |
1168 | /// |
1169 | /// # Safety |
1170 | /// |
1171 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |
1172 | /// ptr` and `ptr <= end_ptr`. |
1173 | #[inline (always)] |
1174 | pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>( |
1175 | start: *const u8, |
1176 | end: *const u8, |
1177 | confirm: F, |
1178 | ) -> Option<*const u8> { |
1179 | debug_assert!(start <= end); |
1180 | |
1181 | let mut ptr = end; |
1182 | while ptr > start { |
1183 | ptr = ptr.offset(-1); |
1184 | if confirm(*ptr) { |
1185 | return Some(ptr); |
1186 | } |
1187 | } |
1188 | None |
1189 | } |
1190 | |
1191 | /// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns |
1192 | /// the number of times `confirm(*ptr)` returns `true`. |
1193 | /// |
1194 | /// # Safety |
1195 | /// |
1196 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |
1197 | /// ptr` and `ptr <= end_ptr`. |
1198 | #[inline (always)] |
1199 | pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>( |
1200 | start: *const u8, |
1201 | end: *const u8, |
1202 | confirm: F, |
1203 | ) -> usize { |
1204 | debug_assert!(start <= end); |
1205 | let mut ptr = start; |
1206 | let mut count = 0; |
1207 | while ptr < end { |
1208 | if confirm(*ptr) { |
1209 | count += 1; |
1210 | } |
1211 | ptr = ptr.offset(1); |
1212 | } |
1213 | count |
1214 | } |
1215 | |