1 | /*! |
---|---|

2 | Generic crate-internal routines for the `memchr` family of functions. |

3 | */ |

4 | |

5 | // What follows is a vector algorithm generic over the specific vector |

6 | // type to detect the position of one, two or three needles in a haystack. |

7 | // From what I know, this is a "classic" algorithm, although I don't |

8 | // believe it has been published in any peer reviewed journal. I believe |

9 | // it can be found in places like glibc and Go's standard library. It |

10 | // appears to be well known and is elaborated on in more detail here: |

11 | // https://gms.tf/stdfind-and-memchr-optimizations.html |

12 | // |

13 | // While the routine below is fairly long and perhaps intimidating, the basic |

14 | // idea is actually very simple and can be expressed straight-forwardly in |

15 | // pseudo code. The psuedo code below is written for 128 bit vectors, but the |

16 | // actual code below works for anything that implements the Vector trait. |

17 | // |

18 | // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 |

19 | // // Note: shift amount is in bytes |

20 | // |

21 | // while i <= haystack.len() - 16: |

22 | // // A 16 byte vector. Each byte in chunk corresponds to a byte in |

23 | // // the haystack. |

24 | // chunk = haystack[i:i+16] |

25 | // // Compare bytes in needle with bytes in chunk. The result is a 16 |

26 | // // byte chunk where each byte is 0xFF if the corresponding bytes |

27 | // // in needle and chunk were equal, or 0x00 otherwise. |

28 | // eqs = cmpeq(needle, chunk) |

29 | // // Return a 32 bit integer where the most significant 16 bits |

30 | // // are always 0 and the lower 16 bits correspond to whether the |

31 | // // most significant bit in the correspond byte in `eqs` is set. |

32 | // // In other words, `mask as u16` has bit i set if and only if |

33 | // // needle[i] == chunk[i]. |

34 | // mask = movemask(eqs) |

35 | // |

36 | // // Mask is 0 if there is no match, and non-zero otherwise. |

37 | // if mask != 0: |

38 | // // trailing_zeros tells us the position of the least significant |

39 | // // bit that is set. |

40 | // return i + trailing_zeros(mask) |

41 | // |

42 | // // haystack length may not be a multiple of 16, so search the rest. |

43 | // while i < haystack.len(): |

44 | // if haystack[i] == n1: |

45 | // return i |

46 | // |

47 | // // No match found. |

48 | // return NULL |

49 | // |

50 | // In fact, we could loosely translate the above code to Rust line-for-line |

51 | // and it would be a pretty fast algorithm. But, we pull out all the stops |

52 | // to go as fast as possible: |

53 | // |

54 | // 1. We use aligned loads. That is, we do some finagling to make sure our |

55 | // primary loop not only proceeds in increments of 16 bytes, but that |

56 | // the address of haystack's pointer that we dereference is aligned to |

57 | // 16 bytes. 16 is a magic number here because it is the size of SSE2 |

58 | // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) |

59 | // Therefore, to get aligned loads, our pointer's address must be evenly |

60 | // divisible by 16. |

61 | // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's |

62 | // kind of like loop unrolling, but we combine the equality comparisons |

63 | // using a vector OR such that we only need to extract a single mask to |

64 | // determine whether a match exists or not. If so, then we do some |

65 | // book-keeping to determine the precise location but otherwise mush on. |

66 | // 3. We use our "chunk" comparison routine in as many places as possible, |

67 | // even if it means using unaligned loads. In particular, if haystack |

68 | // starts with an unaligned address, then we do an unaligned load to |

69 | // search the first 16 bytes. We then start our primary loop at the |

70 | // smallest subsequent aligned address, which will actually overlap with |

71 | // previously searched bytes. But we're OK with that. We do a similar |

72 | // dance at the end of our primary loop. Finally, to avoid a |

73 | // byte-at-a-time loop at the end, we do a final 16 byte unaligned load |

74 | // that may overlap with a previous load. This is OK because it converts |

75 | // a loop into a small number of very fast vector instructions. The overlap |

76 | // is OK because we know the place where the overlap occurs does not |

77 | // contain a match. |

78 | // |

79 | // And that's pretty all there is to it. Note that since the below is |

80 | // generic and since it's meant to be inlined into routines with a |

81 | // `#[target_feature(enable = "...")]` annotation, we must mark all routines as |

82 | // both unsafe and `#[inline(always)]`. |

83 | // |

84 | // The fact that the code below is generic does somewhat inhibit us. For |

85 | // example, I've noticed that introducing an unlineable `#[cold]` function to |

86 | // handle the match case in the loop generates tighter assembly, but there is |

87 | // no way to do this in the generic code below because the generic code doesn't |

88 | // know what `target_feature` annotation to apply to the unlineable function. |

89 | // We could make such functions part of the `Vector` trait, but we instead live |

90 | // with the slightly sub-optimal codegen for now since it doesn't seem to have |

91 | // a noticeable perf difference. |

92 | |

93 | use crate::{ |

94 | ext::Pointer, |

95 | vector::{MoveMask, Vector}, |

96 | }; |

97 | |

98 | /// Finds all occurrences of a single byte in a haystack. |

99 | #[derive(Clone, Copy, Debug)] |

100 | pub(crate) struct One<V> { |

101 | s1: u8, |

102 | v1: V, |

103 | } |

104 | |

105 | impl<V: Vector> One<V> { |

106 | /// The number of bytes we examine per each iteration of our search loop. |

107 | const LOOP_SIZE: usize = 4 * V::BYTES; |

108 | |

109 | /// Create a new searcher that finds occurrences of the byte given. |

110 | #[inline(always)] |

111 | pub(crate) unsafe fn new(needle: u8) -> One<V> { |

112 | One { s1: needle, v1: V::splat(needle) } |

113 | } |

114 | |

115 | /// Returns the needle given to `One::new`. |

116 | #[inline(always)] |

117 | pub(crate) fn needle1(&self) -> u8 { |

118 | self.s1 |

119 | } |

120 | |

121 | /// Return a pointer to the first occurrence of the needle in the given |

122 | /// haystack. If no such occurrence exists, then `None` is returned. |

123 | /// |

124 | /// When a match is found, the pointer returned is guaranteed to be |

125 | /// `>= start` and `< end`. |

126 | /// |

127 | /// # Safety |

128 | /// |

129 | /// * It must be the case that `start < end` and that the distance between |

130 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

131 | /// to do at least an unaligned load of `V` at `start`. |

132 | /// * Both `start` and `end` must be valid for reads. |

133 | /// * Both `start` and `end` must point to an initialized value. |

134 | /// * Both `start` and `end` must point to the same allocated object and |

135 | /// must either be in bounds or at most one byte past the end of the |

136 | /// allocated object. |

137 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

138 | /// object. |

139 | /// * The distance between `start` and `end` must not overflow `isize`. |

140 | /// * The distance being in bounds must not rely on "wrapping around" the |

141 | /// address space. |

142 | #[inline(always)] |

143 | pub(crate) unsafe fn find_raw( |

144 | &self, |

145 | start: *const u8, |

146 | end: *const u8, |

147 | ) -> Option<*const u8> { |

148 | // If we want to support vectors bigger than 256 bits, we probably |

149 | // need to move up to using a u64 for the masks used below. Currently |

150 | // they are 32 bits, which means we're SOL for vectors that need masks |

151 | // bigger than 32 bits. Overall unclear until there's a use case. |

152 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

153 | |

154 | let topos = V::Mask::first_offset; |

155 | let len = end.distance(start); |

156 | debug_assert!( |

157 | len >= V::BYTES, |

158 | "haystack has length {}, but must be at least {}", |

159 | len, |

160 | V::BYTES |

161 | ); |

162 | |

163 | // Search a possibly unaligned chunk at `start`. This covers any part |

164 | // of the haystack prior to where aligned loads can start. |

165 | if let Some(cur) = self.search_chunk(start, topos) { |

166 | return Some(cur); |

167 | } |

168 | // Set `cur` to the first V-aligned pointer greater than `start`. |

169 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |

170 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |

171 | if len >= Self::LOOP_SIZE { |

172 | while cur <= end.sub(Self::LOOP_SIZE) { |

173 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

174 | |

175 | let a = V::load_aligned(cur); |

176 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |

177 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |

178 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |

179 | let eqa = self.v1.cmpeq(a); |

180 | let eqb = self.v1.cmpeq(b); |

181 | let eqc = self.v1.cmpeq(c); |

182 | let eqd = self.v1.cmpeq(d); |

183 | let or1 = eqa.or(eqb); |

184 | let or2 = eqc.or(eqd); |

185 | let or3 = or1.or(or2); |

186 | if or3.movemask_will_have_non_zero() { |

187 | let mask = eqa.movemask(); |

188 | if mask.has_non_zero() { |

189 | return Some(cur.add(topos(mask))); |

190 | } |

191 | |

192 | let mask = eqb.movemask(); |

193 | if mask.has_non_zero() { |

194 | return Some(cur.add(1 * V::BYTES).add(topos(mask))); |

195 | } |

196 | |

197 | let mask = eqc.movemask(); |

198 | if mask.has_non_zero() { |

199 | return Some(cur.add(2 * V::BYTES).add(topos(mask))); |

200 | } |

201 | |

202 | let mask = eqd.movemask(); |

203 | debug_assert!(mask.has_non_zero()); |

204 | return Some(cur.add(3 * V::BYTES).add(topos(mask))); |

205 | } |

206 | cur = cur.add(Self::LOOP_SIZE); |

207 | } |

208 | } |

209 | // Handle any leftovers after the aligned loop above. We use unaligned |

210 | // loads here, but I believe we are guaranteed that they are aligned |

211 | // since `cur` is aligned. |

212 | while cur <= end.sub(V::BYTES) { |

213 | debug_assert!(end.distance(cur) >= V::BYTES); |

214 | if let Some(cur) = self.search_chunk(cur, topos) { |

215 | return Some(cur); |

216 | } |

217 | cur = cur.add(V::BYTES); |

218 | } |

219 | // Finally handle any remaining bytes less than the size of V. In this |

220 | // case, our pointer may indeed be unaligned and the load may overlap |

221 | // with the previous one. But that's okay since we know the previous |

222 | // load didn't lead to a match (otherwise we wouldn't be here). |

223 | if cur < end { |

224 | debug_assert!(end.distance(cur) < V::BYTES); |

225 | cur = cur.sub(V::BYTES - end.distance(cur)); |

226 | debug_assert_eq!(end.distance(cur), V::BYTES); |

227 | return self.search_chunk(cur, topos); |

228 | } |

229 | None |

230 | } |

231 | |

232 | /// Return a pointer to the last occurrence of the needle in the given |

233 | /// haystack. If no such occurrence exists, then `None` is returned. |

234 | /// |

235 | /// When a match is found, the pointer returned is guaranteed to be |

236 | /// `>= start` and `< end`. |

237 | /// |

238 | /// # Safety |

239 | /// |

240 | /// * It must be the case that `start < end` and that the distance between |

241 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

242 | /// to do at least an unaligned load of `V` at `start`. |

243 | /// * Both `start` and `end` must be valid for reads. |

244 | /// * Both `start` and `end` must point to an initialized value. |

245 | /// * Both `start` and `end` must point to the same allocated object and |

246 | /// must either be in bounds or at most one byte past the end of the |

247 | /// allocated object. |

248 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

249 | /// object. |

250 | /// * The distance between `start` and `end` must not overflow `isize`. |

251 | /// * The distance being in bounds must not rely on "wrapping around" the |

252 | /// address space. |

253 | #[inline(always)] |

254 | pub(crate) unsafe fn rfind_raw( |

255 | &self, |

256 | start: *const u8, |

257 | end: *const u8, |

258 | ) -> Option<*const u8> { |

259 | // If we want to support vectors bigger than 256 bits, we probably |

260 | // need to move up to using a u64 for the masks used below. Currently |

261 | // they are 32 bits, which means we're SOL for vectors that need masks |

262 | // bigger than 32 bits. Overall unclear until there's a use case. |

263 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

264 | |

265 | let topos = V::Mask::last_offset; |

266 | let len = end.distance(start); |

267 | debug_assert!( |

268 | len >= V::BYTES, |

269 | "haystack has length {}, but must be at least {}", |

270 | len, |

271 | V::BYTES |

272 | ); |

273 | |

274 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |

275 | return Some(cur); |

276 | } |

277 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |

278 | debug_assert!(start <= cur && cur <= end); |

279 | if len >= Self::LOOP_SIZE { |

280 | while cur >= start.add(Self::LOOP_SIZE) { |

281 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

282 | |

283 | cur = cur.sub(Self::LOOP_SIZE); |

284 | let a = V::load_aligned(cur); |

285 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |

286 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |

287 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |

288 | let eqa = self.v1.cmpeq(a); |

289 | let eqb = self.v1.cmpeq(b); |

290 | let eqc = self.v1.cmpeq(c); |

291 | let eqd = self.v1.cmpeq(d); |

292 | let or1 = eqa.or(eqb); |

293 | let or2 = eqc.or(eqd); |

294 | let or3 = or1.or(or2); |

295 | if or3.movemask_will_have_non_zero() { |

296 | let mask = eqd.movemask(); |

297 | if mask.has_non_zero() { |

298 | return Some(cur.add(3 * V::BYTES).add(topos(mask))); |

299 | } |

300 | |

301 | let mask = eqc.movemask(); |

302 | if mask.has_non_zero() { |

303 | return Some(cur.add(2 * V::BYTES).add(topos(mask))); |

304 | } |

305 | |

306 | let mask = eqb.movemask(); |

307 | if mask.has_non_zero() { |

308 | return Some(cur.add(1 * V::BYTES).add(topos(mask))); |

309 | } |

310 | |

311 | let mask = eqa.movemask(); |

312 | debug_assert!(mask.has_non_zero()); |

313 | return Some(cur.add(topos(mask))); |

314 | } |

315 | } |

316 | } |

317 | while cur >= start.add(V::BYTES) { |

318 | debug_assert!(cur.distance(start) >= V::BYTES); |

319 | cur = cur.sub(V::BYTES); |

320 | if let Some(cur) = self.search_chunk(cur, topos) { |

321 | return Some(cur); |

322 | } |

323 | } |

324 | if cur > start { |

325 | debug_assert!(cur.distance(start) < V::BYTES); |

326 | return self.search_chunk(start, topos); |

327 | } |

328 | None |

329 | } |

330 | |

331 | /// Return a count of all matching bytes in the given haystack. |

332 | /// |

333 | /// # Safety |

334 | /// |

335 | /// * It must be the case that `start < end` and that the distance between |

336 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

337 | /// to do at least an unaligned load of `V` at `start`. |

338 | /// * Both `start` and `end` must be valid for reads. |

339 | /// * Both `start` and `end` must point to an initialized value. |

340 | /// * Both `start` and `end` must point to the same allocated object and |

341 | /// must either be in bounds or at most one byte past the end of the |

342 | /// allocated object. |

343 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

344 | /// object. |

345 | /// * The distance between `start` and `end` must not overflow `isize`. |

346 | /// * The distance being in bounds must not rely on "wrapping around" the |

347 | /// address space. |

348 | #[inline(always)] |

349 | pub(crate) unsafe fn count_raw( |

350 | &self, |

351 | start: *const u8, |

352 | end: *const u8, |

353 | ) -> usize { |

354 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

355 | |

356 | let confirm = |b| b == self.needle1(); |

357 | let len = end.distance(start); |

358 | debug_assert!( |

359 | len >= V::BYTES, |

360 | "haystack has length {}, but must be at least {}", |

361 | len, |

362 | V::BYTES |

363 | ); |

364 | |

365 | // Set `cur` to the first V-aligned pointer greater than `start`. |

366 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |

367 | // Count any matching bytes before we start our aligned loop. |

368 | let mut count = count_byte_by_byte(start, cur, confirm); |

369 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |

370 | if len >= Self::LOOP_SIZE { |

371 | while cur <= end.sub(Self::LOOP_SIZE) { |

372 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

373 | |

374 | let a = V::load_aligned(cur); |

375 | let b = V::load_aligned(cur.add(1 * V::BYTES)); |

376 | let c = V::load_aligned(cur.add(2 * V::BYTES)); |

377 | let d = V::load_aligned(cur.add(3 * V::BYTES)); |

378 | let eqa = self.v1.cmpeq(a); |

379 | let eqb = self.v1.cmpeq(b); |

380 | let eqc = self.v1.cmpeq(c); |

381 | let eqd = self.v1.cmpeq(d); |

382 | count += eqa.movemask().count_ones(); |

383 | count += eqb.movemask().count_ones(); |

384 | count += eqc.movemask().count_ones(); |

385 | count += eqd.movemask().count_ones(); |

386 | cur = cur.add(Self::LOOP_SIZE); |

387 | } |

388 | } |

389 | // Handle any leftovers after the aligned loop above. We use unaligned |

390 | // loads here, but I believe we are guaranteed that they are aligned |

391 | // since `cur` is aligned. |

392 | while cur <= end.sub(V::BYTES) { |

393 | debug_assert!(end.distance(cur) >= V::BYTES); |

394 | let chunk = V::load_unaligned(cur); |

395 | count += self.v1.cmpeq(chunk).movemask().count_ones(); |

396 | cur = cur.add(V::BYTES); |

397 | } |

398 | // And finally count any leftovers that weren't caught above. |

399 | count += count_byte_by_byte(cur, end, confirm); |

400 | count |

401 | } |

402 | |

403 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |

404 | /// |

405 | /// `mask_to_offset` should be a function that converts a `movemask` to |

406 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |

407 | /// match location if one is found. Generally it is expected to use either |

408 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |

409 | /// one is implementing a forward or reverse search, respectively. |

410 | /// |

411 | /// # Safety |

412 | /// |

413 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |

414 | /// load of size `V::BYTES` at `cur`. |

415 | #[inline(always)] |

416 | unsafe fn search_chunk( |

417 | &self, |

418 | cur: *const u8, |

419 | mask_to_offset: impl Fn(V::Mask) -> usize, |

420 | ) -> Option<*const u8> { |

421 | let chunk = V::load_unaligned(cur); |

422 | let mask = self.v1.cmpeq(chunk).movemask(); |

423 | if mask.has_non_zero() { |

424 | Some(cur.add(mask_to_offset(mask))) |

425 | } else { |

426 | None |

427 | } |

428 | } |

429 | } |

430 | |

431 | /// Finds all occurrences of two bytes in a haystack. |

432 | /// |

433 | /// That is, this reports matches of one of two possible bytes. For example, |

434 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |

435 | /// `4` and `5`. |

436 | #[derive(Clone, Copy, Debug)] |

437 | pub(crate) struct Two<V> { |

438 | s1: u8, |

439 | s2: u8, |

440 | v1: V, |

441 | v2: V, |

442 | } |

443 | |

444 | impl<V: Vector> Two<V> { |

445 | /// The number of bytes we examine per each iteration of our search loop. |

446 | const LOOP_SIZE: usize = 2 * V::BYTES; |

447 | |

448 | /// Create a new searcher that finds occurrences of the byte given. |

449 | #[inline(always)] |

450 | pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> { |

451 | Two { |

452 | s1: needle1, |

453 | s2: needle2, |

454 | v1: V::splat(needle1), |

455 | v2: V::splat(needle2), |

456 | } |

457 | } |

458 | |

459 | /// Returns the first needle given to `Two::new`. |

460 | #[inline(always)] |

461 | pub(crate) fn needle1(&self) -> u8 { |

462 | self.s1 |

463 | } |

464 | |

465 | /// Returns the second needle given to `Two::new`. |

466 | #[inline(always)] |

467 | pub(crate) fn needle2(&self) -> u8 { |

468 | self.s2 |

469 | } |

470 | |

471 | /// Return a pointer to the first occurrence of one of the needles in the |

472 | /// given haystack. If no such occurrence exists, then `None` is returned. |

473 | /// |

474 | /// When a match is found, the pointer returned is guaranteed to be |

475 | /// `>= start` and `< end`. |

476 | /// |

477 | /// # Safety |

478 | /// |

479 | /// * It must be the case that `start < end` and that the distance between |

480 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

481 | /// to do at least an unaligned load of `V` at `start`. |

482 | /// * Both `start` and `end` must be valid for reads. |

483 | /// * Both `start` and `end` must point to an initialized value. |

484 | /// * Both `start` and `end` must point to the same allocated object and |

485 | /// must either be in bounds or at most one byte past the end of the |

486 | /// allocated object. |

487 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

488 | /// object. |

489 | /// * The distance between `start` and `end` must not overflow `isize`. |

490 | /// * The distance being in bounds must not rely on "wrapping around" the |

491 | /// address space. |

492 | #[inline(always)] |

493 | pub(crate) unsafe fn find_raw( |

494 | &self, |

495 | start: *const u8, |

496 | end: *const u8, |

497 | ) -> Option<*const u8> { |

498 | // If we want to support vectors bigger than 256 bits, we probably |

499 | // need to move up to using a u64 for the masks used below. Currently |

500 | // they are 32 bits, which means we're SOL for vectors that need masks |

501 | // bigger than 32 bits. Overall unclear until there's a use case. |

502 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

503 | |

504 | let topos = V::Mask::first_offset; |

505 | let len = end.distance(start); |

506 | debug_assert!( |

507 | len >= V::BYTES, |

508 | "haystack has length {}, but must be at least {}", |

509 | len, |

510 | V::BYTES |

511 | ); |

512 | |

513 | // Search a possibly unaligned chunk at `start`. This covers any part |

514 | // of the haystack prior to where aligned loads can start. |

515 | if let Some(cur) = self.search_chunk(start, topos) { |

516 | return Some(cur); |

517 | } |

518 | // Set `cur` to the first V-aligned pointer greater than `start`. |

519 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |

520 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |

521 | if len >= Self::LOOP_SIZE { |

522 | while cur <= end.sub(Self::LOOP_SIZE) { |

523 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

524 | |

525 | let a = V::load_aligned(cur); |

526 | let b = V::load_aligned(cur.add(V::BYTES)); |

527 | let eqa1 = self.v1.cmpeq(a); |

528 | let eqb1 = self.v1.cmpeq(b); |

529 | let eqa2 = self.v2.cmpeq(a); |

530 | let eqb2 = self.v2.cmpeq(b); |

531 | let or1 = eqa1.or(eqb1); |

532 | let or2 = eqa2.or(eqb2); |

533 | let or3 = or1.or(or2); |

534 | if or3.movemask_will_have_non_zero() { |

535 | let mask = eqa1.movemask().or(eqa2.movemask()); |

536 | if mask.has_non_zero() { |

537 | return Some(cur.add(topos(mask))); |

538 | } |

539 | |

540 | let mask = eqb1.movemask().or(eqb2.movemask()); |

541 | debug_assert!(mask.has_non_zero()); |

542 | return Some(cur.add(V::BYTES).add(topos(mask))); |

543 | } |

544 | cur = cur.add(Self::LOOP_SIZE); |

545 | } |

546 | } |

547 | // Handle any leftovers after the aligned loop above. We use unaligned |

548 | // loads here, but I believe we are guaranteed that they are aligned |

549 | // since `cur` is aligned. |

550 | while cur <= end.sub(V::BYTES) { |

551 | debug_assert!(end.distance(cur) >= V::BYTES); |

552 | if let Some(cur) = self.search_chunk(cur, topos) { |

553 | return Some(cur); |

554 | } |

555 | cur = cur.add(V::BYTES); |

556 | } |

557 | // Finally handle any remaining bytes less than the size of V. In this |

558 | // case, our pointer may indeed be unaligned and the load may overlap |

559 | // with the previous one. But that's okay since we know the previous |

560 | // load didn't lead to a match (otherwise we wouldn't be here). |

561 | if cur < end { |

562 | debug_assert!(end.distance(cur) < V::BYTES); |

563 | cur = cur.sub(V::BYTES - end.distance(cur)); |

564 | debug_assert_eq!(end.distance(cur), V::BYTES); |

565 | return self.search_chunk(cur, topos); |

566 | } |

567 | None |

568 | } |

569 | |

570 | /// Return a pointer to the last occurrence of the needle in the given |

571 | /// haystack. If no such occurrence exists, then `None` is returned. |

572 | /// |

573 | /// When a match is found, the pointer returned is guaranteed to be |

574 | /// `>= start` and `< end`. |

575 | /// |

576 | /// # Safety |

577 | /// |

578 | /// * It must be the case that `start < end` and that the distance between |

579 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

580 | /// to do at least an unaligned load of `V` at `start`. |

581 | /// * Both `start` and `end` must be valid for reads. |

582 | /// * Both `start` and `end` must point to an initialized value. |

583 | /// * Both `start` and `end` must point to the same allocated object and |

584 | /// must either be in bounds or at most one byte past the end of the |

585 | /// allocated object. |

586 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

587 | /// object. |

588 | /// * The distance between `start` and `end` must not overflow `isize`. |

589 | /// * The distance being in bounds must not rely on "wrapping around" the |

590 | /// address space. |

591 | #[inline(always)] |

592 | pub(crate) unsafe fn rfind_raw( |

593 | &self, |

594 | start: *const u8, |

595 | end: *const u8, |

596 | ) -> Option<*const u8> { |

597 | // If we want to support vectors bigger than 256 bits, we probably |

598 | // need to move up to using a u64 for the masks used below. Currently |

599 | // they are 32 bits, which means we're SOL for vectors that need masks |

600 | // bigger than 32 bits. Overall unclear until there's a use case. |

601 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

602 | |

603 | let topos = V::Mask::last_offset; |

604 | let len = end.distance(start); |

605 | debug_assert!( |

606 | len >= V::BYTES, |

607 | "haystack has length {}, but must be at least {}", |

608 | len, |

609 | V::BYTES |

610 | ); |

611 | |

612 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |

613 | return Some(cur); |

614 | } |

615 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |

616 | debug_assert!(start <= cur && cur <= end); |

617 | if len >= Self::LOOP_SIZE { |

618 | while cur >= start.add(Self::LOOP_SIZE) { |

619 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

620 | |

621 | cur = cur.sub(Self::LOOP_SIZE); |

622 | let a = V::load_aligned(cur); |

623 | let b = V::load_aligned(cur.add(V::BYTES)); |

624 | let eqa1 = self.v1.cmpeq(a); |

625 | let eqb1 = self.v1.cmpeq(b); |

626 | let eqa2 = self.v2.cmpeq(a); |

627 | let eqb2 = self.v2.cmpeq(b); |

628 | let or1 = eqa1.or(eqb1); |

629 | let or2 = eqa2.or(eqb2); |

630 | let or3 = or1.or(or2); |

631 | if or3.movemask_will_have_non_zero() { |

632 | let mask = eqb1.movemask().or(eqb2.movemask()); |

633 | if mask.has_non_zero() { |

634 | return Some(cur.add(V::BYTES).add(topos(mask))); |

635 | } |

636 | |

637 | let mask = eqa1.movemask().or(eqa2.movemask()); |

638 | debug_assert!(mask.has_non_zero()); |

639 | return Some(cur.add(topos(mask))); |

640 | } |

641 | } |

642 | } |

643 | while cur >= start.add(V::BYTES) { |

644 | debug_assert!(cur.distance(start) >= V::BYTES); |

645 | cur = cur.sub(V::BYTES); |

646 | if let Some(cur) = self.search_chunk(cur, topos) { |

647 | return Some(cur); |

648 | } |

649 | } |

650 | if cur > start { |

651 | debug_assert!(cur.distance(start) < V::BYTES); |

652 | return self.search_chunk(start, topos); |

653 | } |

654 | None |

655 | } |

656 | |

657 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |

658 | /// |

659 | /// `mask_to_offset` should be a function that converts a `movemask` to |

660 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |

661 | /// match location if one is found. Generally it is expected to use either |

662 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |

663 | /// one is implementing a forward or reverse search, respectively. |

664 | /// |

665 | /// # Safety |

666 | /// |

667 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |

668 | /// load of size `V::BYTES` at `cur`. |

669 | #[inline(always)] |

670 | unsafe fn search_chunk( |

671 | &self, |

672 | cur: *const u8, |

673 | mask_to_offset: impl Fn(V::Mask) -> usize, |

674 | ) -> Option<*const u8> { |

675 | let chunk = V::load_unaligned(cur); |

676 | let eq1 = self.v1.cmpeq(chunk); |

677 | let eq2 = self.v2.cmpeq(chunk); |

678 | let mask = eq1.or(eq2).movemask(); |

679 | if mask.has_non_zero() { |

680 | let mask1 = eq1.movemask(); |

681 | let mask2 = eq2.movemask(); |

682 | Some(cur.add(mask_to_offset(mask1.or(mask2)))) |

683 | } else { |

684 | None |

685 | } |

686 | } |

687 | } |

688 | |

689 | /// Finds all occurrences of two bytes in a haystack. |

690 | /// |

691 | /// That is, this reports matches of one of two possible bytes. For example, |

692 | /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, |

693 | /// `4` and `5`. |

694 | #[derive(Clone, Copy, Debug)] |

695 | pub(crate) struct Three<V> { |

696 | s1: u8, |

697 | s2: u8, |

698 | s3: u8, |

699 | v1: V, |

700 | v2: V, |

701 | v3: V, |

702 | } |

703 | |

704 | impl<V: Vector> Three<V> { |

705 | /// The number of bytes we examine per each iteration of our search loop. |

706 | const LOOP_SIZE: usize = 2 * V::BYTES; |

707 | |

708 | /// Create a new searcher that finds occurrences of the byte given. |

709 | #[inline(always)] |

710 | pub(crate) unsafe fn new( |

711 | needle1: u8, |

712 | needle2: u8, |

713 | needle3: u8, |

714 | ) -> Three<V> { |

715 | Three { |

716 | s1: needle1, |

717 | s2: needle2, |

718 | s3: needle3, |

719 | v1: V::splat(needle1), |

720 | v2: V::splat(needle2), |

721 | v3: V::splat(needle3), |

722 | } |

723 | } |

724 | |

725 | /// Returns the first needle given to `Three::new`. |

726 | #[inline(always)] |

727 | pub(crate) fn needle1(&self) -> u8 { |

728 | self.s1 |

729 | } |

730 | |

731 | /// Returns the second needle given to `Three::new`. |

732 | #[inline(always)] |

733 | pub(crate) fn needle2(&self) -> u8 { |

734 | self.s2 |

735 | } |

736 | |

737 | /// Returns the third needle given to `Three::new`. |

738 | #[inline(always)] |

739 | pub(crate) fn needle3(&self) -> u8 { |

740 | self.s3 |

741 | } |

742 | |

743 | /// Return a pointer to the first occurrence of one of the needles in the |

744 | /// given haystack. If no such occurrence exists, then `None` is returned. |

745 | /// |

746 | /// When a match is found, the pointer returned is guaranteed to be |

747 | /// `>= start` and `< end`. |

748 | /// |

749 | /// # Safety |

750 | /// |

751 | /// * It must be the case that `start < end` and that the distance between |

752 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

753 | /// to do at least an unaligned load of `V` at `start`. |

754 | /// * Both `start` and `end` must be valid for reads. |

755 | /// * Both `start` and `end` must point to an initialized value. |

756 | /// * Both `start` and `end` must point to the same allocated object and |

757 | /// must either be in bounds or at most one byte past the end of the |

758 | /// allocated object. |

759 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

760 | /// object. |

761 | /// * The distance between `start` and `end` must not overflow `isize`. |

762 | /// * The distance being in bounds must not rely on "wrapping around" the |

763 | /// address space. |

764 | #[inline(always)] |

765 | pub(crate) unsafe fn find_raw( |

766 | &self, |

767 | start: *const u8, |

768 | end: *const u8, |

769 | ) -> Option<*const u8> { |

770 | // If we want to support vectors bigger than 256 bits, we probably |

771 | // need to move up to using a u64 for the masks used below. Currently |

772 | // they are 32 bits, which means we're SOL for vectors that need masks |

773 | // bigger than 32 bits. Overall unclear until there's a use case. |

774 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

775 | |

776 | let topos = V::Mask::first_offset; |

777 | let len = end.distance(start); |

778 | debug_assert!( |

779 | len >= V::BYTES, |

780 | "haystack has length {}, but must be at least {}", |

781 | len, |

782 | V::BYTES |

783 | ); |

784 | |

785 | // Search a possibly unaligned chunk at `start`. This covers any part |

786 | // of the haystack prior to where aligned loads can start. |

787 | if let Some(cur) = self.search_chunk(start, topos) { |

788 | return Some(cur); |

789 | } |

790 | // Set `cur` to the first V-aligned pointer greater than `start`. |

791 | let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); |

792 | debug_assert!(cur > start && end.sub(V::BYTES) >= start); |

793 | if len >= Self::LOOP_SIZE { |

794 | while cur <= end.sub(Self::LOOP_SIZE) { |

795 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

796 | |

797 | let a = V::load_aligned(cur); |

798 | let b = V::load_aligned(cur.add(V::BYTES)); |

799 | let eqa1 = self.v1.cmpeq(a); |

800 | let eqb1 = self.v1.cmpeq(b); |

801 | let eqa2 = self.v2.cmpeq(a); |

802 | let eqb2 = self.v2.cmpeq(b); |

803 | let eqa3 = self.v3.cmpeq(a); |

804 | let eqb3 = self.v3.cmpeq(b); |

805 | let or1 = eqa1.or(eqb1); |

806 | let or2 = eqa2.or(eqb2); |

807 | let or3 = eqa3.or(eqb3); |

808 | let or4 = or1.or(or2); |

809 | let or5 = or3.or(or4); |

810 | if or5.movemask_will_have_non_zero() { |

811 | let mask = eqa1 |

812 | .movemask() |

813 | .or(eqa2.movemask()) |

814 | .or(eqa3.movemask()); |

815 | if mask.has_non_zero() { |

816 | return Some(cur.add(topos(mask))); |

817 | } |

818 | |

819 | let mask = eqb1 |

820 | .movemask() |

821 | .or(eqb2.movemask()) |

822 | .or(eqb3.movemask()); |

823 | debug_assert!(mask.has_non_zero()); |

824 | return Some(cur.add(V::BYTES).add(topos(mask))); |

825 | } |

826 | cur = cur.add(Self::LOOP_SIZE); |

827 | } |

828 | } |

829 | // Handle any leftovers after the aligned loop above. We use unaligned |

830 | // loads here, but I believe we are guaranteed that they are aligned |

831 | // since `cur` is aligned. |

832 | while cur <= end.sub(V::BYTES) { |

833 | debug_assert!(end.distance(cur) >= V::BYTES); |

834 | if let Some(cur) = self.search_chunk(cur, topos) { |

835 | return Some(cur); |

836 | } |

837 | cur = cur.add(V::BYTES); |

838 | } |

839 | // Finally handle any remaining bytes less than the size of V. In this |

840 | // case, our pointer may indeed be unaligned and the load may overlap |

841 | // with the previous one. But that's okay since we know the previous |

842 | // load didn't lead to a match (otherwise we wouldn't be here). |

843 | if cur < end { |

844 | debug_assert!(end.distance(cur) < V::BYTES); |

845 | cur = cur.sub(V::BYTES - end.distance(cur)); |

846 | debug_assert_eq!(end.distance(cur), V::BYTES); |

847 | return self.search_chunk(cur, topos); |

848 | } |

849 | None |

850 | } |

851 | |

852 | /// Return a pointer to the last occurrence of the needle in the given |

853 | /// haystack. If no such occurrence exists, then `None` is returned. |

854 | /// |

855 | /// When a match is found, the pointer returned is guaranteed to be |

856 | /// `>= start` and `< end`. |

857 | /// |

858 | /// # Safety |

859 | /// |

860 | /// * It must be the case that `start < end` and that the distance between |

861 | /// them is at least equal to `V::BYTES`. That is, it must always be valid |

862 | /// to do at least an unaligned load of `V` at `start`. |

863 | /// * Both `start` and `end` must be valid for reads. |

864 | /// * Both `start` and `end` must point to an initialized value. |

865 | /// * Both `start` and `end` must point to the same allocated object and |

866 | /// must either be in bounds or at most one byte past the end of the |

867 | /// allocated object. |

868 | /// * Both `start` and `end` must be _derived from_ a pointer to the same |

869 | /// object. |

870 | /// * The distance between `start` and `end` must not overflow `isize`. |

871 | /// * The distance being in bounds must not rely on "wrapping around" the |

872 | /// address space. |

873 | #[inline(always)] |

874 | pub(crate) unsafe fn rfind_raw( |

875 | &self, |

876 | start: *const u8, |

877 | end: *const u8, |

878 | ) -> Option<*const u8> { |

879 | // If we want to support vectors bigger than 256 bits, we probably |

880 | // need to move up to using a u64 for the masks used below. Currently |

881 | // they are 32 bits, which means we're SOL for vectors that need masks |

882 | // bigger than 32 bits. Overall unclear until there's a use case. |

883 | debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); |

884 | |

885 | let topos = V::Mask::last_offset; |

886 | let len = end.distance(start); |

887 | debug_assert!( |

888 | len >= V::BYTES, |

889 | "haystack has length {}, but must be at least {}", |

890 | len, |

891 | V::BYTES |

892 | ); |

893 | |

894 | if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { |

895 | return Some(cur); |

896 | } |

897 | let mut cur = end.sub(end.as_usize() & V::ALIGN); |

898 | debug_assert!(start <= cur && cur <= end); |

899 | if len >= Self::LOOP_SIZE { |

900 | while cur >= start.add(Self::LOOP_SIZE) { |

901 | debug_assert_eq!(0, cur.as_usize() % V::BYTES); |

902 | |

903 | cur = cur.sub(Self::LOOP_SIZE); |

904 | let a = V::load_aligned(cur); |

905 | let b = V::load_aligned(cur.add(V::BYTES)); |

906 | let eqa1 = self.v1.cmpeq(a); |

907 | let eqb1 = self.v1.cmpeq(b); |

908 | let eqa2 = self.v2.cmpeq(a); |

909 | let eqb2 = self.v2.cmpeq(b); |

910 | let eqa3 = self.v3.cmpeq(a); |

911 | let eqb3 = self.v3.cmpeq(b); |

912 | let or1 = eqa1.or(eqb1); |

913 | let or2 = eqa2.or(eqb2); |

914 | let or3 = eqa3.or(eqb3); |

915 | let or4 = or1.or(or2); |

916 | let or5 = or3.or(or4); |

917 | if or5.movemask_will_have_non_zero() { |

918 | let mask = eqb1 |

919 | .movemask() |

920 | .or(eqb2.movemask()) |

921 | .or(eqb3.movemask()); |

922 | if mask.has_non_zero() { |

923 | return Some(cur.add(V::BYTES).add(topos(mask))); |

924 | } |

925 | |

926 | let mask = eqa1 |

927 | .movemask() |

928 | .or(eqa2.movemask()) |

929 | .or(eqa3.movemask()); |

930 | debug_assert!(mask.has_non_zero()); |

931 | return Some(cur.add(topos(mask))); |

932 | } |

933 | } |

934 | } |

935 | while cur >= start.add(V::BYTES) { |

936 | debug_assert!(cur.distance(start) >= V::BYTES); |

937 | cur = cur.sub(V::BYTES); |

938 | if let Some(cur) = self.search_chunk(cur, topos) { |

939 | return Some(cur); |

940 | } |

941 | } |

942 | if cur > start { |

943 | debug_assert!(cur.distance(start) < V::BYTES); |

944 | return self.search_chunk(start, topos); |

945 | } |

946 | None |

947 | } |

948 | |

949 | /// Search `V::BYTES` starting at `cur` via an unaligned load. |

950 | /// |

951 | /// `mask_to_offset` should be a function that converts a `movemask` to |

952 | /// an offset such that `cur.add(offset)` corresponds to a pointer to the |

953 | /// match location if one is found. Generally it is expected to use either |

954 | /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether |

955 | /// one is implementing a forward or reverse search, respectively. |

956 | /// |

957 | /// # Safety |

958 | /// |

959 | /// `cur` must be a valid pointer and it must be valid to do an unaligned |

960 | /// load of size `V::BYTES` at `cur`. |

961 | #[inline(always)] |

962 | unsafe fn search_chunk( |

963 | &self, |

964 | cur: *const u8, |

965 | mask_to_offset: impl Fn(V::Mask) -> usize, |

966 | ) -> Option<*const u8> { |

967 | let chunk = V::load_unaligned(cur); |

968 | let eq1 = self.v1.cmpeq(chunk); |

969 | let eq2 = self.v2.cmpeq(chunk); |

970 | let eq3 = self.v3.cmpeq(chunk); |

971 | let mask = eq1.or(eq2).or(eq3).movemask(); |

972 | if mask.has_non_zero() { |

973 | let mask1 = eq1.movemask(); |

974 | let mask2 = eq2.movemask(); |

975 | let mask3 = eq3.movemask(); |

976 | Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3)))) |

977 | } else { |

978 | None |

979 | } |

980 | } |

981 | } |

982 | |

983 | /// An iterator over all occurrences of a set of bytes in a haystack. |

984 | /// |

985 | /// This iterator implements the routines necessary to provide a |

986 | /// `DoubleEndedIterator` impl, which means it can also be used to find |

987 | /// occurrences in reverse order. |

988 | /// |

989 | /// The lifetime parameters are as follows: |

990 | /// |

991 | /// * `'h` refers to the lifetime of the haystack being searched. |

992 | /// |

993 | /// This type is intended to be used to implement all iterators for the |

994 | /// `memchr` family of functions. It handles a tiny bit of marginally tricky |

995 | /// raw pointer math, but otherwise expects the caller to provide `find_raw` |

996 | /// and `rfind_raw` routines for each call of `next` and `next_back`, |

997 | /// respectively. |

998 | #[derive(Clone, Debug)] |

999 | pub(crate) struct Iter<'h> { |

1000 | /// The original starting point into the haystack. We use this to convert |

1001 | /// pointers to offsets. |

1002 | original_start: *const u8, |

1003 | /// The current starting point into the haystack. That is, where the next |

1004 | /// search will begin. |

1005 | start: *const u8, |

1006 | /// The current ending point into the haystack. That is, where the next |

1007 | /// reverse search will begin. |

1008 | end: *const u8, |

1009 | /// A marker for tracking the lifetime of the start/cur_start/cur_end |

1010 | /// pointers above, which all point into the haystack. |

1011 | haystack: core::marker::PhantomData<&'h [u8]>, |

1012 | } |

1013 | |

1014 | // SAFETY: Iter contains no shared references to anything that performs any |

1015 | // interior mutations. Also, the lifetime guarantees that Iter will not outlive |

1016 | // the haystack. |

1017 | unsafe impl<'h> Send for Iter<'h> {} |

1018 | |

1019 | // SAFETY: Iter perform no interior mutations, therefore no explicit |

1020 | // synchronization is necessary. Also, the lifetime guarantees that Iter will |

1021 | // not outlive the haystack. |

1022 | unsafe impl<'h> Sync for Iter<'h> {} |

1023 | |

1024 | impl<'h> Iter<'h> { |

1025 | /// Create a new generic memchr iterator. |

1026 | #[inline(always)] |

1027 | pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> { |

1028 | Iter { |

1029 | original_start: haystack.as_ptr(), |

1030 | start: haystack.as_ptr(), |

1031 | end: haystack.as_ptr().wrapping_add(haystack.len()), |

1032 | haystack: core::marker::PhantomData, |

1033 | } |

1034 | } |

1035 | |

1036 | /// Returns the next occurrence in the forward direction. |

1037 | /// |

1038 | /// # Safety |

1039 | /// |

1040 | /// Callers must ensure that if a pointer is returned from the closure |

1041 | /// provided, then it must be greater than or equal to the start pointer |

1042 | /// and less than the end pointer. |

1043 | #[inline(always)] |

1044 | pub(crate) unsafe fn next( |

1045 | &mut self, |

1046 | mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |

1047 | ) -> Option<usize> { |

1048 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |

1049 | // We only ever modify start/end corresponding to a matching offset |

1050 | // found between start and end. Thus all changes to start/end maintain |

1051 | // our safety requirements. |

1052 | // |

1053 | // The only other assumption we rely on is that the pointer returned |

1054 | // by `find_raw` satisfies `self.start <= found < self.end`, and that |

1055 | // safety contract is forwarded to the caller. |

1056 | let found = find_raw(self.start, self.end)?; |

1057 | let result = found.distance(self.original_start); |

1058 | self.start = found.add(1); |

1059 | Some(result) |

1060 | } |

1061 | |

1062 | /// Returns the number of remaining elements in this iterator. |

1063 | #[inline(always)] |

1064 | pub(crate) fn count( |

1065 | self, |

1066 | mut count_raw: impl FnMut(*const u8, *const u8) -> usize, |

1067 | ) -> usize { |

1068 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |

1069 | // We only ever modify start/end corresponding to a matching offset |

1070 | // found between start and end. Thus all changes to start/end maintain |

1071 | // our safety requirements. |

1072 | count_raw(self.start, self.end) |

1073 | } |

1074 | |

1075 | /// Returns the next occurrence in reverse. |

1076 | /// |

1077 | /// # Safety |

1078 | /// |

1079 | /// Callers must ensure that if a pointer is returned from the closure |

1080 | /// provided, then it must be greater than or equal to the start pointer |

1081 | /// and less than the end pointer. |

1082 | #[inline(always)] |

1083 | pub(crate) unsafe fn next_back( |

1084 | &mut self, |

1085 | mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |

1086 | ) -> Option<usize> { |

1087 | // SAFETY: Pointers are derived directly from the same &[u8] haystack. |

1088 | // We only ever modify start/end corresponding to a matching offset |

1089 | // found between start and end. Thus all changes to start/end maintain |

1090 | // our safety requirements. |

1091 | // |

1092 | // The only other assumption we rely on is that the pointer returned |

1093 | // by `rfind_raw` satisfies `self.start <= found < self.end`, and that |

1094 | // safety contract is forwarded to the caller. |

1095 | let found = rfind_raw(self.start, self.end)?; |

1096 | let result = found.distance(self.original_start); |

1097 | self.end = found; |

1098 | Some(result) |

1099 | } |

1100 | |

1101 | /// Provides an implementation of `Iterator::size_hint`. |

1102 | #[inline(always)] |

1103 | pub(crate) fn size_hint(&self) -> (usize, Option<usize>) { |

1104 | (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize()))) |

1105 | } |

1106 | } |

1107 | |

1108 | /// Search a slice using a function that operates on raw pointers. |

1109 | /// |

1110 | /// Given a function to search a contiguous sequence of memory for the location |

1111 | /// of a non-empty set of bytes, this will execute that search on a slice of |

1112 | /// bytes. The pointer returned by the given function will be converted to an |

1113 | /// offset relative to the starting point of the given slice. That is, if a |

1114 | /// match is found, the offset returned by this routine is guaranteed to be a |

1115 | /// valid index into `haystack`. |

1116 | /// |

1117 | /// Callers may use this for a forward or reverse search. |

1118 | /// |

1119 | /// # Safety |

1120 | /// |

1121 | /// Callers must ensure that if a pointer is returned by `find_raw`, then the |

1122 | /// pointer must be greater than or equal to the starting pointer and less than |

1123 | /// the end pointer. |

1124 | #[inline(always)] |

1125 | pub(crate) unsafe fn search_slice_with_raw( |

1126 | haystack: &[u8], |

1127 | mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, |

1128 | ) -> Option<usize> { |

1129 | // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but |

1130 | // otherwise, `start` and `end` are valid due to the guarantees provided by |

1131 | // a &[u8]. |

1132 | let start = haystack.as_ptr(); |

1133 | let end = start.add(haystack.len()); |

1134 | let found = find_raw(start, end)?; |

1135 | Some(found.distance(start)) |

1136 | } |

1137 | |

1138 | /// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or |

1139 | /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is |

1140 | /// returned. If the latter occurs, then the pointer at which `confirm` returns |

1141 | /// `true` is returned. |

1142 | /// |

1143 | /// # Safety |

1144 | /// |

1145 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |

1146 | /// ptr` and `ptr <= end_ptr`. |

1147 | #[inline(always)] |

1148 | pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>( |

1149 | start: *const u8, |

1150 | end: *const u8, |

1151 | confirm: F, |

1152 | ) -> Option<*const u8> { |

1153 | debug_assert!(start <= end); |

1154 | let mut ptr = start; |

1155 | while ptr < end { |

1156 | if confirm(*ptr) { |

1157 | return Some(ptr); |

1158 | } |

1159 | ptr = ptr.offset(1); |

1160 | } |

1161 | None |

1162 | } |

1163 | |

1164 | /// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or |

1165 | /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is |

1166 | /// returned. If the latter occurs, then the pointer at which `confirm` returns |

1167 | /// `true` is returned. |

1168 | /// |

1169 | /// # Safety |

1170 | /// |

1171 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |

1172 | /// ptr` and `ptr <= end_ptr`. |

1173 | #[inline(always)] |

1174 | pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>( |

1175 | start: *const u8, |

1176 | end: *const u8, |

1177 | confirm: F, |

1178 | ) -> Option<*const u8> { |

1179 | debug_assert!(start <= end); |

1180 | |

1181 | let mut ptr = end; |

1182 | while ptr > start { |

1183 | ptr = ptr.offset(-1); |

1184 | if confirm(*ptr) { |

1185 | return Some(ptr); |

1186 | } |

1187 | } |

1188 | None |

1189 | } |

1190 | |

1191 | /// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns |

1192 | /// the number of times `confirm(*ptr)` returns `true`. |

1193 | /// |

1194 | /// # Safety |

1195 | /// |

1196 | /// Callers must provide valid pointers and they must satisfy `start_ptr <= |

1197 | /// ptr` and `ptr <= end_ptr`. |

1198 | #[inline(always)] |

1199 | pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>( |

1200 | start: *const u8, |

1201 | end: *const u8, |

1202 | confirm: F, |

1203 | ) -> usize { |

1204 | debug_assert!(start <= end); |

1205 | let mut ptr = start; |

1206 | let mut count = 0; |

1207 | while ptr < end { |

1208 | if confirm(*ptr) { |

1209 | count += 1; |

1210 | } |

1211 | ptr = ptr.offset(1); |

1212 | } |

1213 | count |

1214 | } |

1215 |