1 | /*! |
2 | This module provides forward and reverse substring search routines. |
3 | |
4 | Unlike the standard library's substring search routines, these work on |
5 | arbitrary bytes. For all non-empty needles, these routines will report exactly |
6 | the same values as the corresponding routines in the standard library. For |
7 | the empty needle, the standard library reports matches only at valid UTF-8 |
8 | boundaries, where as these routines will report matches at every position. |
9 | |
10 | Other than being able to work on arbitrary bytes, the primary reason to prefer |
11 | these routines over the standard library routines is that these will generally |
12 | be faster. In some cases, significantly so. |
13 | |
14 | # Example: iterating over substring matches |
15 | |
16 | This example shows how to use [`find_iter`] to find occurrences of a substring |
17 | in a haystack. |
18 | |
19 | ``` |
20 | use memchr::memmem; |
21 | |
22 | let haystack = b"foo bar foo baz foo" ; |
23 | |
24 | let mut it = memmem::find_iter(haystack, "foo" ); |
25 | assert_eq!(Some(0), it.next()); |
26 | assert_eq!(Some(8), it.next()); |
27 | assert_eq!(Some(16), it.next()); |
28 | assert_eq!(None, it.next()); |
29 | ``` |
30 | |
31 | # Example: iterating over substring matches in reverse |
32 | |
33 | This example shows how to use [`rfind_iter`] to find occurrences of a substring |
34 | in a haystack starting from the end of the haystack. |
35 | |
36 | **NOTE:** This module does not implement double ended iterators, so reverse |
37 | searches aren't done by calling `rev` on a forward iterator. |
38 | |
39 | ``` |
40 | use memchr::memmem; |
41 | |
42 | let haystack = b"foo bar foo baz foo" ; |
43 | |
44 | let mut it = memmem::rfind_iter(haystack, "foo" ); |
45 | assert_eq!(Some(16), it.next()); |
46 | assert_eq!(Some(8), it.next()); |
47 | assert_eq!(Some(0), it.next()); |
48 | assert_eq!(None, it.next()); |
49 | ``` |
50 | |
51 | # Example: repeating a search for the same needle |
52 | |
53 | It may be possible for the overhead of constructing a substring searcher to be |
54 | measurable in some workloads. In cases where the same needle is used to search |
55 | many haystacks, it is possible to do construction once and thus to avoid it for |
56 | subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for |
57 | reverse searches). |
58 | |
59 | ``` |
60 | use memchr::memmem; |
61 | |
62 | let finder = memmem::Finder::new("foo" ); |
63 | |
64 | assert_eq!(Some(4), finder.find(b"baz foo quux" )); |
65 | assert_eq!(None, finder.find(b"quux baz bar" )); |
66 | ``` |
67 | */ |
68 | |
69 | pub use self::prefilter::Prefilter; |
70 | |
71 | use crate::{ |
72 | cow::CowBytes, |
73 | memmem::{ |
74 | prefilter::{Pre, PrefilterFn, PrefilterState}, |
75 | rabinkarp::NeedleHash, |
76 | rarebytes::RareNeedleBytes, |
77 | }, |
78 | }; |
79 | |
80 | /// Defines a suite of quickcheck properties for forward and reverse |
81 | /// substring searching. |
82 | /// |
83 | /// This is defined in this specific spot so that it can be used freely among |
84 | /// the different substring search implementations. I couldn't be bothered to |
85 | /// fight with the macro-visibility rules enough to figure out how to stuff it |
86 | /// somewhere more convenient. |
87 | #[cfg (all(test, feature = "std" ))] |
88 | macro_rules! define_memmem_quickcheck_tests { |
89 | ($fwd:expr, $rev:expr) => { |
90 | use crate::memmem::proptests; |
91 | |
92 | quickcheck::quickcheck! { |
93 | fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool { |
94 | proptests::prefix_is_substring(false, &bs, $fwd) |
95 | } |
96 | |
97 | fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool { |
98 | proptests::suffix_is_substring(false, &bs, $fwd) |
99 | } |
100 | |
101 | fn qc_fwd_matches_naive( |
102 | haystack: Vec<u8>, |
103 | needle: Vec<u8> |
104 | ) -> bool { |
105 | proptests::matches_naive(false, &haystack, &needle, $fwd) |
106 | } |
107 | |
108 | fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool { |
109 | proptests::prefix_is_substring(true, &bs, $rev) |
110 | } |
111 | |
112 | fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool { |
113 | proptests::suffix_is_substring(true, &bs, $rev) |
114 | } |
115 | |
116 | fn qc_rev_matches_naive( |
117 | haystack: Vec<u8>, |
118 | needle: Vec<u8> |
119 | ) -> bool { |
120 | proptests::matches_naive(true, &haystack, &needle, $rev) |
121 | } |
122 | } |
123 | }; |
124 | } |
125 | |
126 | /// Defines a suite of "simple" hand-written tests for a substring |
127 | /// implementation. |
128 | /// |
129 | /// This is defined here for the same reason that |
130 | /// define_memmem_quickcheck_tests is defined here. |
131 | #[cfg (test)] |
132 | macro_rules! define_memmem_simple_tests { |
133 | ($fwd:expr, $rev:expr) => { |
134 | use crate::memmem::testsimples; |
135 | |
136 | #[test] |
137 | fn simple_forward() { |
138 | testsimples::run_search_tests_fwd($fwd); |
139 | } |
140 | |
141 | #[test] |
142 | fn simple_reverse() { |
143 | testsimples::run_search_tests_rev($rev); |
144 | } |
145 | }; |
146 | } |
147 | |
148 | mod byte_frequencies; |
149 | #[cfg (memchr_runtime_simd)] |
150 | mod genericsimd; |
151 | mod prefilter; |
152 | mod rabinkarp; |
153 | mod rarebytes; |
154 | mod twoway; |
155 | mod util; |
156 | #[cfg (memchr_runtime_simd)] |
157 | mod vector; |
158 | #[cfg (all(memchr_runtime_wasm128))] |
159 | mod wasm; |
160 | #[cfg (all(not(miri), target_arch = "x86_64" , memchr_runtime_simd))] |
161 | mod x86; |
162 | |
163 | /// Returns an iterator over all non-overlapping occurrences of a substring in |
164 | /// a haystack. |
165 | /// |
166 | /// # Complexity |
167 | /// |
168 | /// This routine is guaranteed to have worst case linear time complexity |
169 | /// with respect to both the needle and the haystack. That is, this runs |
170 | /// in `O(needle.len() + haystack.len())` time. |
171 | /// |
172 | /// This routine is also guaranteed to have worst case constant space |
173 | /// complexity. |
174 | /// |
175 | /// # Examples |
176 | /// |
177 | /// Basic usage: |
178 | /// |
179 | /// ``` |
180 | /// use memchr::memmem; |
181 | /// |
182 | /// let haystack = b"foo bar foo baz foo" ; |
183 | /// let mut it = memmem::find_iter(haystack, b"foo" ); |
184 | /// assert_eq!(Some(0), it.next()); |
185 | /// assert_eq!(Some(8), it.next()); |
186 | /// assert_eq!(Some(16), it.next()); |
187 | /// assert_eq!(None, it.next()); |
188 | /// ``` |
189 | #[inline ] |
190 | pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
191 | haystack: &'h [u8], |
192 | needle: &'n N, |
193 | ) -> FindIter<'h, 'n> { |
194 | FindIter::new(haystack, Finder::new(needle)) |
195 | } |
196 | |
197 | /// Returns a reverse iterator over all non-overlapping occurrences of a |
198 | /// substring in a haystack. |
199 | /// |
200 | /// # Complexity |
201 | /// |
202 | /// This routine is guaranteed to have worst case linear time complexity |
203 | /// with respect to both the needle and the haystack. That is, this runs |
204 | /// in `O(needle.len() + haystack.len())` time. |
205 | /// |
206 | /// This routine is also guaranteed to have worst case constant space |
207 | /// complexity. |
208 | /// |
209 | /// # Examples |
210 | /// |
211 | /// Basic usage: |
212 | /// |
213 | /// ``` |
214 | /// use memchr::memmem; |
215 | /// |
216 | /// let haystack = b"foo bar foo baz foo" ; |
217 | /// let mut it = memmem::rfind_iter(haystack, b"foo" ); |
218 | /// assert_eq!(Some(16), it.next()); |
219 | /// assert_eq!(Some(8), it.next()); |
220 | /// assert_eq!(Some(0), it.next()); |
221 | /// assert_eq!(None, it.next()); |
222 | /// ``` |
223 | #[inline ] |
224 | pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
225 | haystack: &'h [u8], |
226 | needle: &'n N, |
227 | ) -> FindRevIter<'h, 'n> { |
228 | FindRevIter::new(haystack, finder:FinderRev::new(needle)) |
229 | } |
230 | |
231 | /// Returns the index of the first occurrence of the given needle. |
232 | /// |
233 | /// Note that if you're are searching for the same needle in many different |
234 | /// small haystacks, it may be faster to initialize a [`Finder`] once, |
235 | /// and reuse it for each search. |
236 | /// |
237 | /// # Complexity |
238 | /// |
239 | /// This routine is guaranteed to have worst case linear time complexity |
240 | /// with respect to both the needle and the haystack. That is, this runs |
241 | /// in `O(needle.len() + haystack.len())` time. |
242 | /// |
243 | /// This routine is also guaranteed to have worst case constant space |
244 | /// complexity. |
245 | /// |
246 | /// # Examples |
247 | /// |
248 | /// Basic usage: |
249 | /// |
250 | /// ``` |
251 | /// use memchr::memmem; |
252 | /// |
253 | /// let haystack = b"foo bar baz" ; |
254 | /// assert_eq!(Some(0), memmem::find(haystack, b"foo" )); |
255 | /// assert_eq!(Some(4), memmem::find(haystack, b"bar" )); |
256 | /// assert_eq!(None, memmem::find(haystack, b"quux" )); |
257 | /// ``` |
258 | #[inline ] |
259 | pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
260 | if haystack.len() < 64 { |
261 | rabinkarp::find(haystack, needle) |
262 | } else { |
263 | Finder::new(needle).find(haystack) |
264 | } |
265 | } |
266 | |
267 | /// Returns the index of the last occurrence of the given needle. |
268 | /// |
269 | /// Note that if you're are searching for the same needle in many different |
270 | /// small haystacks, it may be faster to initialize a [`FinderRev`] once, |
271 | /// and reuse it for each search. |
272 | /// |
273 | /// # Complexity |
274 | /// |
275 | /// This routine is guaranteed to have worst case linear time complexity |
276 | /// with respect to both the needle and the haystack. That is, this runs |
277 | /// in `O(needle.len() + haystack.len())` time. |
278 | /// |
279 | /// This routine is also guaranteed to have worst case constant space |
280 | /// complexity. |
281 | /// |
282 | /// # Examples |
283 | /// |
284 | /// Basic usage: |
285 | /// |
286 | /// ``` |
287 | /// use memchr::memmem; |
288 | /// |
289 | /// let haystack = b"foo bar baz" ; |
290 | /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo" )); |
291 | /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar" )); |
292 | /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba" )); |
293 | /// assert_eq!(None, memmem::rfind(haystack, b"quux" )); |
294 | /// ``` |
295 | #[inline ] |
296 | pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
297 | if haystack.len() < 64 { |
298 | rabinkarp::rfind(haystack, needle) |
299 | } else { |
300 | FinderRev::new(needle).rfind(haystack) |
301 | } |
302 | } |
303 | |
304 | /// An iterator over non-overlapping substring matches. |
305 | /// |
306 | /// Matches are reported by the byte offset at which they begin. |
307 | /// |
308 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
309 | /// needle. |
310 | #[derive (Debug)] |
311 | pub struct FindIter<'h, 'n> { |
312 | haystack: &'h [u8], |
313 | prestate: PrefilterState, |
314 | finder: Finder<'n>, |
315 | pos: usize, |
316 | } |
317 | |
318 | impl<'h, 'n> FindIter<'h, 'n> { |
319 | #[inline (always)] |
320 | pub(crate) fn new( |
321 | haystack: &'h [u8], |
322 | finder: Finder<'n>, |
323 | ) -> FindIter<'h, 'n> { |
324 | let prestate = finder.searcher.prefilter_state(); |
325 | FindIter { haystack, prestate, finder, pos: 0 } |
326 | } |
327 | |
328 | /// Convert this iterator into its owned variant, such that it no longer |
329 | /// borrows the finder and needle. |
330 | /// |
331 | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
332 | /// this copies the needle. |
333 | /// |
334 | /// This is only available when the `std` feature is enabled. |
335 | #[cfg (feature = "std" )] |
336 | #[inline ] |
337 | pub fn into_owned(self) -> FindIter<'h, 'static> { |
338 | FindIter { |
339 | haystack: self.haystack, |
340 | prestate: self.prestate, |
341 | finder: self.finder.into_owned(), |
342 | pos: self.pos, |
343 | } |
344 | } |
345 | } |
346 | |
347 | impl<'h, 'n> Iterator for FindIter<'h, 'n> { |
348 | type Item = usize; |
349 | |
350 | fn next(&mut self) -> Option<usize> { |
351 | if self.pos > self.haystack.len() { |
352 | return None; |
353 | } |
354 | let result: Option = self |
355 | .finder |
356 | .searcher |
357 | .find(&mut self.prestate, &self.haystack[self.pos..]); |
358 | match result { |
359 | None => None, |
360 | Some(i: usize) => { |
361 | let pos: usize = self.pos + i; |
362 | self.pos = pos + core::cmp::max(v1:1, self.finder.needle().len()); |
363 | Some(pos) |
364 | } |
365 | } |
366 | } |
367 | } |
368 | |
369 | /// An iterator over non-overlapping substring matches in reverse. |
370 | /// |
371 | /// Matches are reported by the byte offset at which they begin. |
372 | /// |
373 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
374 | /// needle. |
375 | #[derive (Debug)] |
376 | pub struct FindRevIter<'h, 'n> { |
377 | haystack: &'h [u8], |
378 | finder: FinderRev<'n>, |
379 | /// When searching with an empty needle, this gets set to `None` after |
380 | /// we've yielded the last element at `0`. |
381 | pos: Option<usize>, |
382 | } |
383 | |
384 | impl<'h, 'n> FindRevIter<'h, 'n> { |
385 | #[inline (always)] |
386 | pub(crate) fn new( |
387 | haystack: &'h [u8], |
388 | finder: FinderRev<'n>, |
389 | ) -> FindRevIter<'h, 'n> { |
390 | let pos = Some(haystack.len()); |
391 | FindRevIter { haystack, finder, pos } |
392 | } |
393 | |
394 | /// Convert this iterator into its owned variant, such that it no longer |
395 | /// borrows the finder and needle. |
396 | /// |
397 | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
398 | /// this copies the needle. |
399 | /// |
400 | /// This is only available when the `std` feature is enabled. |
401 | #[cfg (feature = "std" )] |
402 | #[inline ] |
403 | pub fn into_owned(self) -> FindRevIter<'h, 'static> { |
404 | FindRevIter { |
405 | haystack: self.haystack, |
406 | finder: self.finder.into_owned(), |
407 | pos: self.pos, |
408 | } |
409 | } |
410 | } |
411 | |
412 | impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { |
413 | type Item = usize; |
414 | |
415 | fn next(&mut self) -> Option<usize> { |
416 | let pos: usize = match self.pos { |
417 | None => return None, |
418 | Some(pos: usize) => pos, |
419 | }; |
420 | let result: Option = self.finder.rfind(&self.haystack[..pos]); |
421 | match result { |
422 | None => None, |
423 | Some(i: usize) => { |
424 | if pos == i { |
425 | self.pos = pos.checked_sub(1); |
426 | } else { |
427 | self.pos = Some(i); |
428 | } |
429 | Some(i) |
430 | } |
431 | } |
432 | } |
433 | } |
434 | |
435 | /// A single substring searcher fixed to a particular needle. |
436 | /// |
437 | /// The purpose of this type is to permit callers to construct a substring |
438 | /// searcher that can be used to search haystacks without the overhead of |
439 | /// constructing the searcher in the first place. This is a somewhat niche |
440 | /// concern when it's necessary to re-use the same needle to search multiple |
441 | /// different haystacks with as little overhead as possible. In general, using |
442 | /// [`find`] is good enough, but `Finder` is useful when you can meaningfully |
443 | /// observe searcher construction time in a profile. |
444 | /// |
445 | /// When the `std` feature is enabled, then this type has an `into_owned` |
446 | /// version which permits building a `Finder` that is not connected to |
447 | /// the lifetime of its needle. |
448 | #[derive (Clone, Debug)] |
449 | pub struct Finder<'n> { |
450 | searcher: Searcher<'n>, |
451 | } |
452 | |
453 | impl<'n> Finder<'n> { |
454 | /// Create a new finder for the given needle. |
455 | #[inline ] |
456 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { |
457 | FinderBuilder::new().build_forward(needle) |
458 | } |
459 | |
460 | /// Returns the index of the first occurrence of this needle in the given |
461 | /// haystack. |
462 | /// |
463 | /// # Complexity |
464 | /// |
465 | /// This routine is guaranteed to have worst case linear time complexity |
466 | /// with respect to both the needle and the haystack. That is, this runs |
467 | /// in `O(needle.len() + haystack.len())` time. |
468 | /// |
469 | /// This routine is also guaranteed to have worst case constant space |
470 | /// complexity. |
471 | /// |
472 | /// # Examples |
473 | /// |
474 | /// Basic usage: |
475 | /// |
476 | /// ``` |
477 | /// use memchr::memmem::Finder; |
478 | /// |
479 | /// let haystack = b"foo bar baz" ; |
480 | /// assert_eq!(Some(0), Finder::new("foo" ).find(haystack)); |
481 | /// assert_eq!(Some(4), Finder::new("bar" ).find(haystack)); |
482 | /// assert_eq!(None, Finder::new("quux" ).find(haystack)); |
483 | /// ``` |
484 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
485 | self.searcher.find(&mut self.searcher.prefilter_state(), haystack) |
486 | } |
487 | |
488 | /// Returns an iterator over all occurrences of a substring in a haystack. |
489 | /// |
490 | /// # Complexity |
491 | /// |
492 | /// This routine is guaranteed to have worst case linear time complexity |
493 | /// with respect to both the needle and the haystack. That is, this runs |
494 | /// in `O(needle.len() + haystack.len())` time. |
495 | /// |
496 | /// This routine is also guaranteed to have worst case constant space |
497 | /// complexity. |
498 | /// |
499 | /// # Examples |
500 | /// |
501 | /// Basic usage: |
502 | /// |
503 | /// ``` |
504 | /// use memchr::memmem::Finder; |
505 | /// |
506 | /// let haystack = b"foo bar foo baz foo" ; |
507 | /// let finder = Finder::new(b"foo" ); |
508 | /// let mut it = finder.find_iter(haystack); |
509 | /// assert_eq!(Some(0), it.next()); |
510 | /// assert_eq!(Some(8), it.next()); |
511 | /// assert_eq!(Some(16), it.next()); |
512 | /// assert_eq!(None, it.next()); |
513 | /// ``` |
514 | #[inline ] |
515 | pub fn find_iter<'a, 'h>( |
516 | &'a self, |
517 | haystack: &'h [u8], |
518 | ) -> FindIter<'h, 'a> { |
519 | FindIter::new(haystack, self.as_ref()) |
520 | } |
521 | |
522 | /// Convert this finder into its owned variant, such that it no longer |
523 | /// borrows the needle. |
524 | /// |
525 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
526 | /// this copies the needle. |
527 | /// |
528 | /// This is only available when the `std` feature is enabled. |
529 | #[cfg (feature = "std" )] |
530 | #[inline ] |
531 | pub fn into_owned(self) -> Finder<'static> { |
532 | Finder { searcher: self.searcher.into_owned() } |
533 | } |
534 | |
535 | /// Convert this finder into its borrowed variant. |
536 | /// |
537 | /// This is primarily useful if your finder is owned and you'd like to |
538 | /// store its borrowed variant in some intermediate data structure. |
539 | /// |
540 | /// Note that the lifetime parameter of the returned finder is tied to the |
541 | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
542 | /// needle itself. Namely, a finder's needle can be either borrowed or |
543 | /// owned, so the lifetime of the needle returned must necessarily be the |
544 | /// shorter of the two. |
545 | #[inline ] |
546 | pub fn as_ref(&self) -> Finder<'_> { |
547 | Finder { searcher: self.searcher.as_ref() } |
548 | } |
549 | |
550 | /// Returns the needle that this finder searches for. |
551 | /// |
552 | /// Note that the lifetime of the needle returned is tied to the lifetime |
553 | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
554 | /// finder's needle can be either borrowed or owned, so the lifetime of the |
555 | /// needle returned must necessarily be the shorter of the two. |
556 | #[inline ] |
557 | pub fn needle(&self) -> &[u8] { |
558 | self.searcher.needle() |
559 | } |
560 | } |
561 | |
562 | /// A single substring reverse searcher fixed to a particular needle. |
563 | /// |
564 | /// The purpose of this type is to permit callers to construct a substring |
565 | /// searcher that can be used to search haystacks without the overhead of |
566 | /// constructing the searcher in the first place. This is a somewhat niche |
567 | /// concern when it's necessary to re-use the same needle to search multiple |
568 | /// different haystacks with as little overhead as possible. In general, |
569 | /// using [`rfind`] is good enough, but `FinderRev` is useful when you can |
570 | /// meaningfully observe searcher construction time in a profile. |
571 | /// |
572 | /// When the `std` feature is enabled, then this type has an `into_owned` |
573 | /// version which permits building a `FinderRev` that is not connected to |
574 | /// the lifetime of its needle. |
575 | #[derive (Clone, Debug)] |
576 | pub struct FinderRev<'n> { |
577 | searcher: SearcherRev<'n>, |
578 | } |
579 | |
580 | impl<'n> FinderRev<'n> { |
581 | /// Create a new reverse finder for the given needle. |
582 | #[inline ] |
583 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { |
584 | FinderBuilder::new().build_reverse(needle) |
585 | } |
586 | |
587 | /// Returns the index of the last occurrence of this needle in the given |
588 | /// haystack. |
589 | /// |
590 | /// The haystack may be any type that can be cheaply converted into a |
591 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
592 | /// |
593 | /// # Complexity |
594 | /// |
595 | /// This routine is guaranteed to have worst case linear time complexity |
596 | /// with respect to both the needle and the haystack. That is, this runs |
597 | /// in `O(needle.len() + haystack.len())` time. |
598 | /// |
599 | /// This routine is also guaranteed to have worst case constant space |
600 | /// complexity. |
601 | /// |
602 | /// # Examples |
603 | /// |
604 | /// Basic usage: |
605 | /// |
606 | /// ``` |
607 | /// use memchr::memmem::FinderRev; |
608 | /// |
609 | /// let haystack = b"foo bar baz" ; |
610 | /// assert_eq!(Some(0), FinderRev::new("foo" ).rfind(haystack)); |
611 | /// assert_eq!(Some(4), FinderRev::new("bar" ).rfind(haystack)); |
612 | /// assert_eq!(None, FinderRev::new("quux" ).rfind(haystack)); |
613 | /// ``` |
614 | pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { |
615 | self.searcher.rfind(haystack.as_ref()) |
616 | } |
617 | |
618 | /// Returns a reverse iterator over all occurrences of a substring in a |
619 | /// haystack. |
620 | /// |
621 | /// # Complexity |
622 | /// |
623 | /// This routine is guaranteed to have worst case linear time complexity |
624 | /// with respect to both the needle and the haystack. That is, this runs |
625 | /// in `O(needle.len() + haystack.len())` time. |
626 | /// |
627 | /// This routine is also guaranteed to have worst case constant space |
628 | /// complexity. |
629 | /// |
630 | /// # Examples |
631 | /// |
632 | /// Basic usage: |
633 | /// |
634 | /// ``` |
635 | /// use memchr::memmem::FinderRev; |
636 | /// |
637 | /// let haystack = b"foo bar foo baz foo" ; |
638 | /// let finder = FinderRev::new(b"foo" ); |
639 | /// let mut it = finder.rfind_iter(haystack); |
640 | /// assert_eq!(Some(16), it.next()); |
641 | /// assert_eq!(Some(8), it.next()); |
642 | /// assert_eq!(Some(0), it.next()); |
643 | /// assert_eq!(None, it.next()); |
644 | /// ``` |
645 | #[inline ] |
646 | pub fn rfind_iter<'a, 'h>( |
647 | &'a self, |
648 | haystack: &'h [u8], |
649 | ) -> FindRevIter<'h, 'a> { |
650 | FindRevIter::new(haystack, self.as_ref()) |
651 | } |
652 | |
653 | /// Convert this finder into its owned variant, such that it no longer |
654 | /// borrows the needle. |
655 | /// |
656 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
657 | /// this copies the needle. |
658 | /// |
659 | /// This is only available when the `std` feature is enabled. |
660 | #[cfg (feature = "std" )] |
661 | #[inline ] |
662 | pub fn into_owned(self) -> FinderRev<'static> { |
663 | FinderRev { searcher: self.searcher.into_owned() } |
664 | } |
665 | |
666 | /// Convert this finder into its borrowed variant. |
667 | /// |
668 | /// This is primarily useful if your finder is owned and you'd like to |
669 | /// store its borrowed variant in some intermediate data structure. |
670 | /// |
671 | /// Note that the lifetime parameter of the returned finder is tied to the |
672 | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
673 | /// needle itself. Namely, a finder's needle can be either borrowed or |
674 | /// owned, so the lifetime of the needle returned must necessarily be the |
675 | /// shorter of the two. |
676 | #[inline ] |
677 | pub fn as_ref(&self) -> FinderRev<'_> { |
678 | FinderRev { searcher: self.searcher.as_ref() } |
679 | } |
680 | |
681 | /// Returns the needle that this finder searches for. |
682 | /// |
683 | /// Note that the lifetime of the needle returned is tied to the lifetime |
684 | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
685 | /// finder's needle can be either borrowed or owned, so the lifetime of the |
686 | /// needle returned must necessarily be the shorter of the two. |
687 | #[inline ] |
688 | pub fn needle(&self) -> &[u8] { |
689 | self.searcher.needle() |
690 | } |
691 | } |
692 | |
693 | /// A builder for constructing non-default forward or reverse memmem finders. |
694 | /// |
695 | /// A builder is primarily useful for configuring a substring searcher. |
696 | /// Currently, the only configuration exposed is the ability to disable |
697 | /// heuristic prefilters used to speed up certain searches. |
698 | #[derive (Clone, Debug, Default)] |
699 | pub struct FinderBuilder { |
700 | config: SearcherConfig, |
701 | } |
702 | |
703 | impl FinderBuilder { |
704 | /// Create a new finder builder with default settings. |
705 | pub fn new() -> FinderBuilder { |
706 | FinderBuilder::default() |
707 | } |
708 | |
709 | /// Build a forward finder using the given needle from the current |
710 | /// settings. |
711 | pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( |
712 | &self, |
713 | needle: &'n B, |
714 | ) -> Finder<'n> { |
715 | Finder { searcher: Searcher::new(self.config, needle.as_ref()) } |
716 | } |
717 | |
718 | /// Build a reverse finder using the given needle from the current |
719 | /// settings. |
720 | pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( |
721 | &self, |
722 | needle: &'n B, |
723 | ) -> FinderRev<'n> { |
724 | FinderRev { searcher: SearcherRev::new(needle.as_ref()) } |
725 | } |
726 | |
727 | /// Configure the prefilter setting for the finder. |
728 | /// |
729 | /// See the documentation for [`Prefilter`] for more discussion on why |
730 | /// you might want to configure this. |
731 | pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { |
732 | self.config.prefilter = prefilter; |
733 | self |
734 | } |
735 | } |
736 | |
737 | /// The internal implementation of a forward substring searcher. |
738 | /// |
739 | /// The reality is that this is a "meta" searcher. Namely, depending on a |
740 | /// variety of parameters (CPU support, target, needle size, haystack size and |
741 | /// even dynamic properties such as prefilter effectiveness), the actual |
742 | /// algorithm employed to do substring search may change. |
743 | #[derive (Clone, Debug)] |
744 | struct Searcher<'n> { |
745 | /// The actual needle we're searching for. |
746 | /// |
747 | /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is |
748 | /// specialized to a single variant (the borrowed form). |
749 | needle: CowBytes<'n>, |
750 | /// A collection of facts computed on the needle that are useful for more |
751 | /// than one substring search algorithm. |
752 | ninfo: NeedleInfo, |
753 | /// A prefilter function, if it was deemed appropriate. |
754 | /// |
755 | /// Some substring search implementations (like Two-Way) benefit greatly |
756 | /// if we can quickly find candidate starting positions for a match. |
757 | prefn: Option<PrefilterFn>, |
758 | /// The actual substring implementation in use. |
759 | kind: SearcherKind, |
760 | } |
761 | |
762 | /// A collection of facts computed about a search needle. |
763 | /// |
764 | /// We group these things together because it's useful to be able to hand them |
765 | /// to prefilters or substring algorithms that want them. |
766 | #[derive (Clone, Copy, Debug)] |
767 | pub(crate) struct NeedleInfo { |
768 | /// The offsets of "rare" bytes detected in the needle. |
769 | /// |
770 | /// This is meant to be a heuristic in order to maximize the effectiveness |
771 | /// of vectorized code. Namely, vectorized code tends to focus on only |
772 | /// one or two bytes. If we pick bytes from the needle that occur |
773 | /// infrequently, then more time will be spent in the vectorized code and |
774 | /// will likely make the overall search (much) faster. |
775 | /// |
776 | /// Of course, this is only a heuristic based on a background frequency |
777 | /// distribution of bytes. But it tends to work very well in practice. |
778 | pub(crate) rarebytes: RareNeedleBytes, |
779 | /// A Rabin-Karp hash of the needle. |
780 | /// |
781 | /// This is store here instead of in a more specific Rabin-Karp search |
782 | /// since Rabin-Karp may be used even if another SearchKind corresponds |
783 | /// to some other search implementation. e.g., If measurements suggest RK |
784 | /// is faster in some cases or if a search implementation can't handle |
785 | /// particularly small haystack. (Moreover, we cannot use RK *generally*, |
786 | /// since its worst case time is multiplicative. Instead, we only use it |
787 | /// some small haystacks, where "small" is a constant.) |
788 | pub(crate) nhash: NeedleHash, |
789 | } |
790 | |
791 | /// Configuration for substring search. |
792 | #[derive (Clone, Copy, Debug, Default)] |
793 | struct SearcherConfig { |
794 | /// This permits changing the behavior of the prefilter, since it can have |
795 | /// a variable impact on performance. |
796 | prefilter: Prefilter, |
797 | } |
798 | |
799 | #[derive (Clone, Debug)] |
800 | enum SearcherKind { |
801 | /// A special case for empty needles. An empty needle always matches, even |
802 | /// in an empty haystack. |
803 | Empty, |
804 | /// This is used whenever the needle is a single byte. In this case, we |
805 | /// always use memchr. |
806 | OneByte(u8), |
807 | /// Two-Way is the generic work horse and is what provides our additive |
808 | /// linear time guarantee. In general, it's used when the needle is bigger |
809 | /// than 8 bytes or so. |
810 | TwoWay(twoway::Forward), |
811 | #[cfg (all(not(miri), target_arch = "x86_64" , memchr_runtime_simd))] |
812 | GenericSIMD128(x86::sse::Forward), |
813 | #[cfg (memchr_runtime_wasm128)] |
814 | GenericSIMD128(wasm::Forward), |
815 | #[cfg (all(not(miri), target_arch = "x86_64" , memchr_runtime_simd))] |
816 | GenericSIMD256(x86::avx::Forward), |
817 | } |
818 | |
819 | impl<'n> Searcher<'n> { |
820 | fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { |
821 | use self::SearcherKind::*; |
822 | |
823 | let ninfo = NeedleInfo::new(needle); |
824 | let mk = |kind: SearcherKind| { |
825 | let prefn = prefilter::forward( |
826 | &config.prefilter, |
827 | &ninfo.rarebytes, |
828 | needle, |
829 | ); |
830 | Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } |
831 | }; |
832 | if needle.len() == 0 { |
833 | return mk(Empty); |
834 | } |
835 | if needle.len() == 1 { |
836 | return mk(OneByte(needle[0])); |
837 | } |
838 | #[cfg (all(not(miri), target_arch = "x86_64" , memchr_runtime_simd))] |
839 | { |
840 | if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { |
841 | return mk(GenericSIMD256(fwd)); |
842 | } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { |
843 | return mk(GenericSIMD128(fwd)); |
844 | } |
845 | } |
846 | #[cfg (all(target_arch = "wasm32" , memchr_runtime_simd))] |
847 | { |
848 | if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { |
849 | return mk(GenericSIMD128(fwd)); |
850 | } |
851 | } |
852 | |
853 | mk(TwoWay(twoway::Forward::new(needle))) |
854 | } |
855 | |
856 | /// Return a fresh prefilter state that can be used with this searcher. |
857 | /// A prefilter state is used to track the effectiveness of a searcher's |
858 | /// prefilter for speeding up searches. Therefore, the prefilter state |
859 | /// should generally be reused on subsequent searches (such as in an |
860 | /// iterator). For searches on a different haystack, then a new prefilter |
861 | /// state should be used. |
862 | /// |
863 | /// This always initializes a valid (but possibly inert) prefilter state |
864 | /// even if this searcher does not have a prefilter enabled. |
865 | fn prefilter_state(&self) -> PrefilterState { |
866 | if self.prefn.is_none() { |
867 | PrefilterState::inert() |
868 | } else { |
869 | PrefilterState::new() |
870 | } |
871 | } |
872 | |
873 | fn needle(&self) -> &[u8] { |
874 | self.needle.as_slice() |
875 | } |
876 | |
877 | fn as_ref(&self) -> Searcher<'_> { |
878 | use self::SearcherKind::*; |
879 | |
880 | let kind = match self.kind { |
881 | Empty => Empty, |
882 | OneByte(b) => OneByte(b), |
883 | TwoWay(tw) => TwoWay(tw), |
884 | #[cfg (all(not(miri), memchr_runtime_simd))] |
885 | GenericSIMD128(gs) => GenericSIMD128(gs), |
886 | #[cfg (all( |
887 | not(miri), |
888 | target_arch = "x86_64" , |
889 | memchr_runtime_simd |
890 | ))] |
891 | GenericSIMD256(gs) => GenericSIMD256(gs), |
892 | }; |
893 | Searcher { |
894 | needle: CowBytes::new(self.needle()), |
895 | ninfo: self.ninfo, |
896 | prefn: self.prefn, |
897 | kind, |
898 | } |
899 | } |
900 | |
901 | #[cfg (feature = "std" )] |
902 | fn into_owned(self) -> Searcher<'static> { |
903 | use self::SearcherKind::*; |
904 | |
905 | let kind = match self.kind { |
906 | Empty => Empty, |
907 | OneByte(b) => OneByte(b), |
908 | TwoWay(tw) => TwoWay(tw), |
909 | #[cfg (all(not(miri), memchr_runtime_simd))] |
910 | GenericSIMD128(gs) => GenericSIMD128(gs), |
911 | #[cfg (all( |
912 | not(miri), |
913 | target_arch = "x86_64" , |
914 | memchr_runtime_simd |
915 | ))] |
916 | GenericSIMD256(gs) => GenericSIMD256(gs), |
917 | }; |
918 | Searcher { |
919 | needle: self.needle.into_owned(), |
920 | ninfo: self.ninfo, |
921 | prefn: self.prefn, |
922 | kind, |
923 | } |
924 | } |
925 | |
926 | /// Implements forward substring search by selecting the implementation |
927 | /// chosen at construction and executing it on the given haystack with the |
928 | /// prefilter's current state of effectiveness. |
929 | #[inline (always)] |
930 | fn find( |
931 | &self, |
932 | state: &mut PrefilterState, |
933 | haystack: &[u8], |
934 | ) -> Option<usize> { |
935 | use self::SearcherKind::*; |
936 | |
937 | let needle = self.needle(); |
938 | if haystack.len() < needle.len() { |
939 | return None; |
940 | } |
941 | match self.kind { |
942 | Empty => Some(0), |
943 | OneByte(b) => crate::memchr(b, haystack), |
944 | TwoWay(ref tw) => { |
945 | // For very short haystacks (e.g., where the prefilter probably |
946 | // can't run), it's faster to just run RK. |
947 | if rabinkarp::is_fast(haystack, needle) { |
948 | rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) |
949 | } else { |
950 | self.find_tw(tw, state, haystack, needle) |
951 | } |
952 | } |
953 | #[cfg (all(not(miri), memchr_runtime_simd))] |
954 | GenericSIMD128(ref gs) => { |
955 | // The SIMD matcher can't handle particularly short haystacks, |
956 | // so we fall back to RK in these cases. |
957 | if haystack.len() < gs.min_haystack_len() { |
958 | rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) |
959 | } else { |
960 | gs.find(haystack, needle) |
961 | } |
962 | } |
963 | #[cfg (all( |
964 | not(miri), |
965 | target_arch = "x86_64" , |
966 | memchr_runtime_simd |
967 | ))] |
968 | GenericSIMD256(ref gs) => { |
969 | // The SIMD matcher can't handle particularly short haystacks, |
970 | // so we fall back to RK in these cases. |
971 | if haystack.len() < gs.min_haystack_len() { |
972 | rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) |
973 | } else { |
974 | gs.find(haystack, needle) |
975 | } |
976 | } |
977 | } |
978 | } |
979 | |
980 | /// Calls Two-Way on the given haystack/needle. |
981 | /// |
982 | /// This is marked as unlineable since it seems to have a better overall |
983 | /// effect on benchmarks. However, this is one of those cases where |
984 | /// inlining it results an improvement in other benchmarks too, so I |
985 | /// suspect we just don't have enough data yet to make the right call here. |
986 | /// |
987 | /// I suspect the main problem is that this function contains two different |
988 | /// inlined copies of Two-Way: one with and one without prefilters enabled. |
989 | #[inline (never)] |
990 | fn find_tw( |
991 | &self, |
992 | tw: &twoway::Forward, |
993 | state: &mut PrefilterState, |
994 | haystack: &[u8], |
995 | needle: &[u8], |
996 | ) -> Option<usize> { |
997 | if let Some(prefn) = self.prefn { |
998 | // We used to look at the length of a haystack here. That is, if |
999 | // it was too small, then don't bother with the prefilter. But two |
1000 | // things changed: the prefilter falls back to memchr for small |
1001 | // haystacks, and, above, Rabin-Karp is employed for tiny haystacks |
1002 | // anyway. |
1003 | if state.is_effective() { |
1004 | let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; |
1005 | return tw.find(Some(&mut pre), haystack, needle); |
1006 | } |
1007 | } |
1008 | tw.find(None, haystack, needle) |
1009 | } |
1010 | } |
1011 | |
1012 | impl NeedleInfo { |
1013 | pub(crate) fn new(needle: &[u8]) -> NeedleInfo { |
1014 | NeedleInfo { |
1015 | rarebytes: RareNeedleBytes::forward(needle), |
1016 | nhash: NeedleHash::forward(needle), |
1017 | } |
1018 | } |
1019 | } |
1020 | |
1021 | /// The internal implementation of a reverse substring searcher. |
1022 | /// |
1023 | /// See the forward searcher docs for more details. Currently, the reverse |
1024 | /// searcher is considerably simpler since it lacks prefilter support. This |
1025 | /// was done because it adds a lot of code, and more surface area to test. And |
1026 | /// in particular, it's not clear whether a prefilter on reverse searching is |
1027 | /// worth it. (If you have a compelling use case, please file an issue!) |
1028 | #[derive (Clone, Debug)] |
1029 | struct SearcherRev<'n> { |
1030 | /// The actual needle we're searching for. |
1031 | needle: CowBytes<'n>, |
1032 | /// A Rabin-Karp hash of the needle. |
1033 | nhash: NeedleHash, |
1034 | /// The actual substring implementation in use. |
1035 | kind: SearcherRevKind, |
1036 | } |
1037 | |
1038 | #[derive (Clone, Debug)] |
1039 | enum SearcherRevKind { |
1040 | /// A special case for empty needles. An empty needle always matches, even |
1041 | /// in an empty haystack. |
1042 | Empty, |
1043 | /// This is used whenever the needle is a single byte. In this case, we |
1044 | /// always use memchr. |
1045 | OneByte(u8), |
1046 | /// Two-Way is the generic work horse and is what provides our additive |
1047 | /// linear time guarantee. In general, it's used when the needle is bigger |
1048 | /// than 8 bytes or so. |
1049 | TwoWay(twoway::Reverse), |
1050 | } |
1051 | |
1052 | impl<'n> SearcherRev<'n> { |
1053 | fn new(needle: &'n [u8]) -> SearcherRev<'n> { |
1054 | use self::SearcherRevKind::*; |
1055 | |
1056 | let kind = if needle.len() == 0 { |
1057 | Empty |
1058 | } else if needle.len() == 1 { |
1059 | OneByte(needle[0]) |
1060 | } else { |
1061 | TwoWay(twoway::Reverse::new(needle)) |
1062 | }; |
1063 | SearcherRev { |
1064 | needle: CowBytes::new(needle), |
1065 | nhash: NeedleHash::reverse(needle), |
1066 | kind, |
1067 | } |
1068 | } |
1069 | |
1070 | fn needle(&self) -> &[u8] { |
1071 | self.needle.as_slice() |
1072 | } |
1073 | |
1074 | fn as_ref(&self) -> SearcherRev<'_> { |
1075 | use self::SearcherRevKind::*; |
1076 | |
1077 | let kind = match self.kind { |
1078 | Empty => Empty, |
1079 | OneByte(b) => OneByte(b), |
1080 | TwoWay(tw) => TwoWay(tw), |
1081 | }; |
1082 | SearcherRev { |
1083 | needle: CowBytes::new(self.needle()), |
1084 | nhash: self.nhash, |
1085 | kind, |
1086 | } |
1087 | } |
1088 | |
1089 | #[cfg (feature = "std" )] |
1090 | fn into_owned(self) -> SearcherRev<'static> { |
1091 | use self::SearcherRevKind::*; |
1092 | |
1093 | let kind = match self.kind { |
1094 | Empty => Empty, |
1095 | OneByte(b) => OneByte(b), |
1096 | TwoWay(tw) => TwoWay(tw), |
1097 | }; |
1098 | SearcherRev { |
1099 | needle: self.needle.into_owned(), |
1100 | nhash: self.nhash, |
1101 | kind, |
1102 | } |
1103 | } |
1104 | |
1105 | /// Implements reverse substring search by selecting the implementation |
1106 | /// chosen at construction and executing it on the given haystack with the |
1107 | /// prefilter's current state of effectiveness. |
1108 | #[inline (always)] |
1109 | fn rfind(&self, haystack: &[u8]) -> Option<usize> { |
1110 | use self::SearcherRevKind::*; |
1111 | |
1112 | let needle = self.needle(); |
1113 | if haystack.len() < needle.len() { |
1114 | return None; |
1115 | } |
1116 | match self.kind { |
1117 | Empty => Some(haystack.len()), |
1118 | OneByte(b) => crate::memrchr(b, haystack), |
1119 | TwoWay(ref tw) => { |
1120 | // For very short haystacks (e.g., where the prefilter probably |
1121 | // can't run), it's faster to just run RK. |
1122 | if rabinkarp::is_fast(haystack, needle) { |
1123 | rabinkarp::rfind_with(&self.nhash, haystack, needle) |
1124 | } else { |
1125 | tw.rfind(haystack, needle) |
1126 | } |
1127 | } |
1128 | } |
1129 | } |
1130 | } |
1131 | |
1132 | /// This module defines some generic quickcheck properties useful for testing |
1133 | /// any substring search algorithm. It also runs those properties for the |
1134 | /// top-level public API memmem routines. (The properties are also used to |
1135 | /// test various substring search implementations more granularly elsewhere as |
1136 | /// well.) |
1137 | #[cfg (all(test, feature = "std" , not(miri)))] |
1138 | mod proptests { |
1139 | // N.B. This defines the quickcheck tests using the properties defined |
1140 | // below. Because of macro-visibility weirdness, the actual macro is |
1141 | // defined at the top of this file. |
1142 | define_memmem_quickcheck_tests!(super::find, super::rfind); |
1143 | |
1144 | /// Check that every prefix of the given byte string is a substring. |
1145 | pub(crate) fn prefix_is_substring( |
1146 | reverse: bool, |
1147 | bs: &[u8], |
1148 | mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, |
1149 | ) -> bool { |
1150 | if bs.is_empty() { |
1151 | return true; |
1152 | } |
1153 | for i in 0..(bs.len() - 1) { |
1154 | let prefix = &bs[..i]; |
1155 | if reverse { |
1156 | assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); |
1157 | } else { |
1158 | assert_eq!(naive_find(bs, prefix), search(bs, prefix)); |
1159 | } |
1160 | } |
1161 | true |
1162 | } |
1163 | |
1164 | /// Check that every suffix of the given byte string is a substring. |
1165 | pub(crate) fn suffix_is_substring( |
1166 | reverse: bool, |
1167 | bs: &[u8], |
1168 | mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, |
1169 | ) -> bool { |
1170 | if bs.is_empty() { |
1171 | return true; |
1172 | } |
1173 | for i in 0..(bs.len() - 1) { |
1174 | let suffix = &bs[i..]; |
1175 | if reverse { |
1176 | assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); |
1177 | } else { |
1178 | assert_eq!(naive_find(bs, suffix), search(bs, suffix)); |
1179 | } |
1180 | } |
1181 | true |
1182 | } |
1183 | |
1184 | /// Check that naive substring search matches the result of the given search |
1185 | /// algorithm. |
1186 | pub(crate) fn matches_naive( |
1187 | reverse: bool, |
1188 | haystack: &[u8], |
1189 | needle: &[u8], |
1190 | mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, |
1191 | ) -> bool { |
1192 | if reverse { |
1193 | naive_rfind(haystack, needle) == search(haystack, needle) |
1194 | } else { |
1195 | naive_find(haystack, needle) == search(haystack, needle) |
1196 | } |
1197 | } |
1198 | |
1199 | /// Naively search forwards for the given needle in the given haystack. |
1200 | fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
1201 | if needle.is_empty() { |
1202 | return Some(0); |
1203 | } else if haystack.len() < needle.len() { |
1204 | return None; |
1205 | } |
1206 | for i in 0..(haystack.len() - needle.len() + 1) { |
1207 | if needle == &haystack[i..i + needle.len()] { |
1208 | return Some(i); |
1209 | } |
1210 | } |
1211 | None |
1212 | } |
1213 | |
1214 | /// Naively search in reverse for the given needle in the given haystack. |
1215 | fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
1216 | if needle.is_empty() { |
1217 | return Some(haystack.len()); |
1218 | } else if haystack.len() < needle.len() { |
1219 | return None; |
1220 | } |
1221 | for i in (0..(haystack.len() - needle.len() + 1)).rev() { |
1222 | if needle == &haystack[i..i + needle.len()] { |
1223 | return Some(i); |
1224 | } |
1225 | } |
1226 | None |
1227 | } |
1228 | } |
1229 | |
1230 | /// This module defines some hand-written "simple" substring tests. It |
1231 | /// also provides routines for easily running them on any substring search |
1232 | /// implementation. |
1233 | #[cfg (test)] |
1234 | mod testsimples { |
1235 | define_memmem_simple_tests!(super::find, super::rfind); |
1236 | |
1237 | /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. |
1238 | type SearchTest = |
1239 | (&'static str, &'static str, Option<usize>, Option<usize>); |
1240 | |
1241 | const SEARCH_TESTS: &'static [SearchTest] = &[ |
1242 | ("" , "" , Some(0), Some(0)), |
1243 | ("" , "a" , Some(0), Some(1)), |
1244 | ("" , "ab" , Some(0), Some(2)), |
1245 | ("" , "abc" , Some(0), Some(3)), |
1246 | ("a" , "" , None, None), |
1247 | ("a" , "a" , Some(0), Some(0)), |
1248 | ("a" , "aa" , Some(0), Some(1)), |
1249 | ("a" , "ba" , Some(1), Some(1)), |
1250 | ("a" , "bba" , Some(2), Some(2)), |
1251 | ("a" , "bbba" , Some(3), Some(3)), |
1252 | ("a" , "bbbab" , Some(3), Some(3)), |
1253 | ("a" , "bbbabb" , Some(3), Some(3)), |
1254 | ("a" , "bbbabbb" , Some(3), Some(3)), |
1255 | ("a" , "bbbbbb" , None, None), |
1256 | ("ab" , "" , None, None), |
1257 | ("ab" , "a" , None, None), |
1258 | ("ab" , "b" , None, None), |
1259 | ("ab" , "ab" , Some(0), Some(0)), |
1260 | ("ab" , "aab" , Some(1), Some(1)), |
1261 | ("ab" , "aaab" , Some(2), Some(2)), |
1262 | ("ab" , "abaab" , Some(0), Some(3)), |
1263 | ("ab" , "baaab" , Some(3), Some(3)), |
1264 | ("ab" , "acb" , None, None), |
1265 | ("ab" , "abba" , Some(0), Some(0)), |
1266 | ("abc" , "ab" , None, None), |
1267 | ("abc" , "abc" , Some(0), Some(0)), |
1268 | ("abc" , "abcz" , Some(0), Some(0)), |
1269 | ("abc" , "abczz" , Some(0), Some(0)), |
1270 | ("abc" , "zabc" , Some(1), Some(1)), |
1271 | ("abc" , "zzabc" , Some(2), Some(2)), |
1272 | ("abc" , "azbc" , None, None), |
1273 | ("abc" , "abzc" , None, None), |
1274 | ("abczdef" , "abczdefzzzzzzzzzzzzzzzzzzzz" , Some(0), Some(0)), |
1275 | ("abczdef" , "zzzzzzzzzzzzzzzzzzzzabczdef" , Some(20), Some(20)), |
1276 | ("xyz" , "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz" , Some(32), Some(32)), |
1277 | // Failures caught by quickcheck. |
1278 | (" \u{0}\u{15}" , " \u{0}\u{15}\u{15}\u{0}" , Some(0), Some(0)), |
1279 | (" \u{0}\u{1e}" , " \u{1e}\u{0}" , None, None), |
1280 | ]; |
1281 | |
1282 | /// Run the substring search tests. `search` should be a closure that |
1283 | /// accepts a haystack and a needle and returns the starting position |
1284 | /// of the first occurrence of needle in the haystack, or `None` if one |
1285 | /// doesn't exist. |
1286 | pub(crate) fn run_search_tests_fwd( |
1287 | mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, |
1288 | ) { |
1289 | for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { |
1290 | let (n, h) = (needle.as_bytes(), haystack.as_bytes()); |
1291 | assert_eq!( |
1292 | expected_fwd, |
1293 | search(h, n), |
1294 | "needle: {:?}, haystack: {:?}, expected: {:?}" , |
1295 | n, |
1296 | h, |
1297 | expected_fwd |
1298 | ); |
1299 | } |
1300 | } |
1301 | |
1302 | /// Run the substring search tests. `search` should be a closure that |
1303 | /// accepts a haystack and a needle and returns the starting position of |
1304 | /// the last occurrence of needle in the haystack, or `None` if one doesn't |
1305 | /// exist. |
1306 | pub(crate) fn run_search_tests_rev( |
1307 | mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, |
1308 | ) { |
1309 | for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { |
1310 | let (n, h) = (needle.as_bytes(), haystack.as_bytes()); |
1311 | assert_eq!( |
1312 | expected_rev, |
1313 | search(h, n), |
1314 | "needle: {:?}, haystack: {:?}, expected: {:?}" , |
1315 | n, |
1316 | h, |
1317 | expected_rev |
1318 | ); |
1319 | } |
1320 | } |
1321 | } |
1322 | |