1 | /*! |
2 | This module provides forward and reverse substring search routines. |
3 | |
4 | Unlike the standard library's substring search routines, these work on |
5 | arbitrary bytes. For all non-empty needles, these routines will report exactly |
6 | the same values as the corresponding routines in the standard library. For |
7 | the empty needle, the standard library reports matches only at valid UTF-8 |
8 | boundaries, where as these routines will report matches at every position. |
9 | |
10 | Other than being able to work on arbitrary bytes, the primary reason to prefer |
11 | these routines over the standard library routines is that these will generally |
12 | be faster. In some cases, significantly so. |
13 | |
14 | # Example: iterating over substring matches |
15 | |
16 | This example shows how to use [`find_iter`] to find occurrences of a substring |
17 | in a haystack. |
18 | |
19 | ``` |
20 | use memchr::memmem; |
21 | |
22 | let haystack = b"foo bar foo baz foo" ; |
23 | |
24 | let mut it = memmem::find_iter(haystack, "foo" ); |
25 | assert_eq!(Some(0), it.next()); |
26 | assert_eq!(Some(8), it.next()); |
27 | assert_eq!(Some(16), it.next()); |
28 | assert_eq!(None, it.next()); |
29 | ``` |
30 | |
31 | # Example: iterating over substring matches in reverse |
32 | |
33 | This example shows how to use [`rfind_iter`] to find occurrences of a substring |
34 | in a haystack starting from the end of the haystack. |
35 | |
36 | **NOTE:** This module does not implement double ended iterators, so reverse |
37 | searches aren't done by calling `rev` on a forward iterator. |
38 | |
39 | ``` |
40 | use memchr::memmem; |
41 | |
42 | let haystack = b"foo bar foo baz foo" ; |
43 | |
44 | let mut it = memmem::rfind_iter(haystack, "foo" ); |
45 | assert_eq!(Some(16), it.next()); |
46 | assert_eq!(Some(8), it.next()); |
47 | assert_eq!(Some(0), it.next()); |
48 | assert_eq!(None, it.next()); |
49 | ``` |
50 | |
51 | # Example: repeating a search for the same needle |
52 | |
53 | It may be possible for the overhead of constructing a substring searcher to be |
54 | measurable in some workloads. In cases where the same needle is used to search |
55 | many haystacks, it is possible to do construction once and thus to avoid it for |
56 | subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for |
57 | reverse searches). |
58 | |
59 | ``` |
60 | use memchr::memmem; |
61 | |
62 | let finder = memmem::Finder::new("foo" ); |
63 | |
64 | assert_eq!(Some(4), finder.find(b"baz foo quux" )); |
65 | assert_eq!(None, finder.find(b"quux baz bar" )); |
66 | ``` |
67 | */ |
68 | |
69 | pub use crate::memmem::searcher::PrefilterConfig as Prefilter; |
70 | |
71 | // This is exported here for use in the crate::arch::all::twoway |
72 | // implementation. This is essentially an abstraction breaker. Namely, the |
73 | // public API of twoway doesn't support providing a prefilter, but its crate |
74 | // internal API does. The main reason for this is that I didn't want to do the |
75 | // API design required to support it without a concrete use case. |
76 | pub(crate) use crate::memmem::searcher::Pre; |
77 | |
78 | use crate::{ |
79 | arch::all::{ |
80 | packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, |
81 | rabinkarp, |
82 | }, |
83 | cow::CowBytes, |
84 | memmem::searcher::{PrefilterState, Searcher, SearcherRev}, |
85 | }; |
86 | |
87 | mod searcher; |
88 | |
89 | /// Returns an iterator over all non-overlapping occurrences of a substring in |
90 | /// a haystack. |
91 | /// |
92 | /// # Complexity |
93 | /// |
94 | /// This routine is guaranteed to have worst case linear time complexity |
95 | /// with respect to both the needle and the haystack. That is, this runs |
96 | /// in `O(needle.len() + haystack.len())` time. |
97 | /// |
98 | /// This routine is also guaranteed to have worst case constant space |
99 | /// complexity. |
100 | /// |
101 | /// # Examples |
102 | /// |
103 | /// Basic usage: |
104 | /// |
105 | /// ``` |
106 | /// use memchr::memmem; |
107 | /// |
108 | /// let haystack = b"foo bar foo baz foo" ; |
109 | /// let mut it = memmem::find_iter(haystack, b"foo" ); |
110 | /// assert_eq!(Some(0), it.next()); |
111 | /// assert_eq!(Some(8), it.next()); |
112 | /// assert_eq!(Some(16), it.next()); |
113 | /// assert_eq!(None, it.next()); |
114 | /// ``` |
115 | #[inline ] |
116 | pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
117 | haystack: &'h [u8], |
118 | needle: &'n N, |
119 | ) -> FindIter<'h, 'n> { |
120 | FindIter::new(haystack, Finder::new(needle)) |
121 | } |
122 | |
123 | /// Returns a reverse iterator over all non-overlapping occurrences of a |
124 | /// substring in a haystack. |
125 | /// |
126 | /// # Complexity |
127 | /// |
128 | /// This routine is guaranteed to have worst case linear time complexity |
129 | /// with respect to both the needle and the haystack. That is, this runs |
130 | /// in `O(needle.len() + haystack.len())` time. |
131 | /// |
132 | /// This routine is also guaranteed to have worst case constant space |
133 | /// complexity. |
134 | /// |
135 | /// # Examples |
136 | /// |
137 | /// Basic usage: |
138 | /// |
139 | /// ``` |
140 | /// use memchr::memmem; |
141 | /// |
142 | /// let haystack = b"foo bar foo baz foo" ; |
143 | /// let mut it = memmem::rfind_iter(haystack, b"foo" ); |
144 | /// assert_eq!(Some(16), it.next()); |
145 | /// assert_eq!(Some(8), it.next()); |
146 | /// assert_eq!(Some(0), it.next()); |
147 | /// assert_eq!(None, it.next()); |
148 | /// ``` |
149 | #[inline ] |
150 | pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
151 | haystack: &'h [u8], |
152 | needle: &'n N, |
153 | ) -> FindRevIter<'h, 'n> { |
154 | FindRevIter::new(haystack, FinderRev::new(needle)) |
155 | } |
156 | |
157 | /// Returns the index of the first occurrence of the given needle. |
158 | /// |
159 | /// Note that if you're are searching for the same needle in many different |
160 | /// small haystacks, it may be faster to initialize a [`Finder`] once, |
161 | /// and reuse it for each search. |
162 | /// |
163 | /// # Complexity |
164 | /// |
165 | /// This routine is guaranteed to have worst case linear time complexity |
166 | /// with respect to both the needle and the haystack. That is, this runs |
167 | /// in `O(needle.len() + haystack.len())` time. |
168 | /// |
169 | /// This routine is also guaranteed to have worst case constant space |
170 | /// complexity. |
171 | /// |
172 | /// # Examples |
173 | /// |
174 | /// Basic usage: |
175 | /// |
176 | /// ``` |
177 | /// use memchr::memmem; |
178 | /// |
179 | /// let haystack = b"foo bar baz" ; |
180 | /// assert_eq!(Some(0), memmem::find(haystack, b"foo" )); |
181 | /// assert_eq!(Some(4), memmem::find(haystack, b"bar" )); |
182 | /// assert_eq!(None, memmem::find(haystack, b"quux" )); |
183 | /// ``` |
184 | #[inline ] |
185 | pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
186 | if haystack.len() < 64 { |
187 | rabinkarp::Finder::new(needle).find(haystack, needle) |
188 | } else { |
189 | Finder::new(needle).find(haystack) |
190 | } |
191 | } |
192 | |
193 | /// Returns the index of the last occurrence of the given needle. |
194 | /// |
195 | /// Note that if you're are searching for the same needle in many different |
196 | /// small haystacks, it may be faster to initialize a [`FinderRev`] once, |
197 | /// and reuse it for each search. |
198 | /// |
199 | /// # Complexity |
200 | /// |
201 | /// This routine is guaranteed to have worst case linear time complexity |
202 | /// with respect to both the needle and the haystack. That is, this runs |
203 | /// in `O(needle.len() + haystack.len())` time. |
204 | /// |
205 | /// This routine is also guaranteed to have worst case constant space |
206 | /// complexity. |
207 | /// |
208 | /// # Examples |
209 | /// |
210 | /// Basic usage: |
211 | /// |
212 | /// ``` |
213 | /// use memchr::memmem; |
214 | /// |
215 | /// let haystack = b"foo bar baz" ; |
216 | /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo" )); |
217 | /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar" )); |
218 | /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba" )); |
219 | /// assert_eq!(None, memmem::rfind(haystack, b"quux" )); |
220 | /// ``` |
221 | #[inline ] |
222 | pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
223 | if haystack.len() < 64 { |
224 | rabinkarp::FinderRev::new(needle).rfind(haystack, needle) |
225 | } else { |
226 | FinderRev::new(needle).rfind(haystack) |
227 | } |
228 | } |
229 | |
230 | /// An iterator over non-overlapping substring matches. |
231 | /// |
232 | /// Matches are reported by the byte offset at which they begin. |
233 | /// |
234 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
235 | /// needle. |
236 | #[derive(Debug)] |
237 | pub struct FindIter<'h, 'n> { |
238 | haystack: &'h [u8], |
239 | prestate: PrefilterState, |
240 | finder: Finder<'n>, |
241 | pos: usize, |
242 | } |
243 | |
244 | impl<'h, 'n> FindIter<'h, 'n> { |
245 | #[inline (always)] |
246 | pub(crate) fn new( |
247 | haystack: &'h [u8], |
248 | finder: Finder<'n>, |
249 | ) -> FindIter<'h, 'n> { |
250 | let prestate = PrefilterState::new(); |
251 | FindIter { haystack, prestate, finder, pos: 0 } |
252 | } |
253 | |
254 | /// Convert this iterator into its owned variant, such that it no longer |
255 | /// borrows the finder and needle. |
256 | /// |
257 | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
258 | /// this copies the needle. |
259 | /// |
260 | /// This is only available when the `alloc` feature is enabled. |
261 | #[cfg (feature = "alloc" )] |
262 | #[inline ] |
263 | pub fn into_owned(self) -> FindIter<'h, 'static> { |
264 | FindIter { |
265 | haystack: self.haystack, |
266 | prestate: self.prestate, |
267 | finder: self.finder.into_owned(), |
268 | pos: self.pos, |
269 | } |
270 | } |
271 | } |
272 | |
273 | impl<'h, 'n> Iterator for FindIter<'h, 'n> { |
274 | type Item = usize; |
275 | |
276 | fn next(&mut self) -> Option<usize> { |
277 | let needle = self.finder.needle(); |
278 | let haystack = self.haystack.get(self.pos..)?; |
279 | let idx = |
280 | self.finder.searcher.find(&mut self.prestate, haystack, needle)?; |
281 | |
282 | let pos = self.pos + idx; |
283 | self.pos = pos + needle.len().max(1); |
284 | |
285 | Some(pos) |
286 | } |
287 | |
288 | fn size_hint(&self) -> (usize, Option<usize>) { |
289 | // The largest possible number of non-overlapping matches is the |
290 | // quotient of the haystack and the needle (or the length of the |
291 | // haystack, if the needle is empty) |
292 | match self.haystack.len().checked_sub(self.pos) { |
293 | None => (0, Some(0)), |
294 | Some(haystack_len) => match self.finder.needle().len() { |
295 | // Empty needles always succeed and match at every point |
296 | // (including the very end) |
297 | 0 => ( |
298 | haystack_len.saturating_add(1), |
299 | haystack_len.checked_add(1), |
300 | ), |
301 | needle_len => (0, Some(haystack_len / needle_len)), |
302 | }, |
303 | } |
304 | } |
305 | } |
306 | |
307 | /// An iterator over non-overlapping substring matches in reverse. |
308 | /// |
309 | /// Matches are reported by the byte offset at which they begin. |
310 | /// |
311 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
312 | /// needle. |
313 | #[derive(Debug)] |
314 | pub struct FindRevIter<'h, 'n> { |
315 | haystack: &'h [u8], |
316 | finder: FinderRev<'n>, |
317 | /// When searching with an empty needle, this gets set to `None` after |
318 | /// we've yielded the last element at `0`. |
319 | pos: Option<usize>, |
320 | } |
321 | |
322 | impl<'h, 'n> FindRevIter<'h, 'n> { |
323 | #[inline (always)] |
324 | pub(crate) fn new( |
325 | haystack: &'h [u8], |
326 | finder: FinderRev<'n>, |
327 | ) -> FindRevIter<'h, 'n> { |
328 | let pos = Some(haystack.len()); |
329 | FindRevIter { haystack, finder, pos } |
330 | } |
331 | |
332 | /// Convert this iterator into its owned variant, such that it no longer |
333 | /// borrows the finder and needle. |
334 | /// |
335 | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
336 | /// this copies the needle. |
337 | /// |
338 | /// This is only available when the `std` feature is enabled. |
339 | #[cfg (feature = "alloc" )] |
340 | #[inline ] |
341 | pub fn into_owned(self) -> FindRevIter<'h, 'static> { |
342 | FindRevIter { |
343 | haystack: self.haystack, |
344 | finder: self.finder.into_owned(), |
345 | pos: self.pos, |
346 | } |
347 | } |
348 | } |
349 | |
350 | impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { |
351 | type Item = usize; |
352 | |
353 | fn next(&mut self) -> Option<usize> { |
354 | let pos = match self.pos { |
355 | None => return None, |
356 | Some(pos) => pos, |
357 | }; |
358 | let result = self.finder.rfind(&self.haystack[..pos]); |
359 | match result { |
360 | None => None, |
361 | Some(i) => { |
362 | if pos == i { |
363 | self.pos = pos.checked_sub(1); |
364 | } else { |
365 | self.pos = Some(i); |
366 | } |
367 | Some(i) |
368 | } |
369 | } |
370 | } |
371 | } |
372 | |
373 | /// A single substring searcher fixed to a particular needle. |
374 | /// |
375 | /// The purpose of this type is to permit callers to construct a substring |
376 | /// searcher that can be used to search haystacks without the overhead of |
377 | /// constructing the searcher in the first place. This is a somewhat niche |
378 | /// concern when it's necessary to re-use the same needle to search multiple |
379 | /// different haystacks with as little overhead as possible. In general, using |
380 | /// [`find`] is good enough, but `Finder` is useful when you can meaningfully |
381 | /// observe searcher construction time in a profile. |
382 | /// |
383 | /// When the `std` feature is enabled, then this type has an `into_owned` |
384 | /// version which permits building a `Finder` that is not connected to |
385 | /// the lifetime of its needle. |
386 | #[derive(Clone, Debug)] |
387 | pub struct Finder<'n> { |
388 | needle: CowBytes<'n>, |
389 | searcher: Searcher, |
390 | } |
391 | |
392 | impl<'n> Finder<'n> { |
393 | /// Create a new finder for the given needle. |
394 | #[inline ] |
395 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { |
396 | FinderBuilder::new().build_forward(needle) |
397 | } |
398 | |
399 | /// Returns the index of the first occurrence of this needle in the given |
400 | /// haystack. |
401 | /// |
402 | /// # Complexity |
403 | /// |
404 | /// This routine is guaranteed to have worst case linear time complexity |
405 | /// with respect to both the needle and the haystack. That is, this runs |
406 | /// in `O(needle.len() + haystack.len())` time. |
407 | /// |
408 | /// This routine is also guaranteed to have worst case constant space |
409 | /// complexity. |
410 | /// |
411 | /// # Examples |
412 | /// |
413 | /// Basic usage: |
414 | /// |
415 | /// ``` |
416 | /// use memchr::memmem::Finder; |
417 | /// |
418 | /// let haystack = b"foo bar baz" ; |
419 | /// assert_eq!(Some(0), Finder::new("foo" ).find(haystack)); |
420 | /// assert_eq!(Some(4), Finder::new("bar" ).find(haystack)); |
421 | /// assert_eq!(None, Finder::new("quux" ).find(haystack)); |
422 | /// ``` |
423 | #[inline ] |
424 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
425 | let mut prestate = PrefilterState::new(); |
426 | let needle = self.needle.as_slice(); |
427 | self.searcher.find(&mut prestate, haystack, needle) |
428 | } |
429 | |
430 | /// Returns an iterator over all occurrences of a substring in a haystack. |
431 | /// |
432 | /// # Complexity |
433 | /// |
434 | /// This routine is guaranteed to have worst case linear time complexity |
435 | /// with respect to both the needle and the haystack. That is, this runs |
436 | /// in `O(needle.len() + haystack.len())` time. |
437 | /// |
438 | /// This routine is also guaranteed to have worst case constant space |
439 | /// complexity. |
440 | /// |
441 | /// # Examples |
442 | /// |
443 | /// Basic usage: |
444 | /// |
445 | /// ``` |
446 | /// use memchr::memmem::Finder; |
447 | /// |
448 | /// let haystack = b"foo bar foo baz foo" ; |
449 | /// let finder = Finder::new(b"foo" ); |
450 | /// let mut it = finder.find_iter(haystack); |
451 | /// assert_eq!(Some(0), it.next()); |
452 | /// assert_eq!(Some(8), it.next()); |
453 | /// assert_eq!(Some(16), it.next()); |
454 | /// assert_eq!(None, it.next()); |
455 | /// ``` |
456 | #[inline ] |
457 | pub fn find_iter<'a, 'h>( |
458 | &'a self, |
459 | haystack: &'h [u8], |
460 | ) -> FindIter<'h, 'a> { |
461 | FindIter::new(haystack, self.as_ref()) |
462 | } |
463 | |
464 | /// Convert this finder into its owned variant, such that it no longer |
465 | /// borrows the needle. |
466 | /// |
467 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
468 | /// this copies the needle. |
469 | /// |
470 | /// This is only available when the `alloc` feature is enabled. |
471 | #[cfg (feature = "alloc" )] |
472 | #[inline ] |
473 | pub fn into_owned(self) -> Finder<'static> { |
474 | Finder { |
475 | needle: self.needle.into_owned(), |
476 | searcher: self.searcher.clone(), |
477 | } |
478 | } |
479 | |
480 | /// Convert this finder into its borrowed variant. |
481 | /// |
482 | /// This is primarily useful if your finder is owned and you'd like to |
483 | /// store its borrowed variant in some intermediate data structure. |
484 | /// |
485 | /// Note that the lifetime parameter of the returned finder is tied to the |
486 | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
487 | /// needle itself. Namely, a finder's needle can be either borrowed or |
488 | /// owned, so the lifetime of the needle returned must necessarily be the |
489 | /// shorter of the two. |
490 | #[inline ] |
491 | pub fn as_ref(&self) -> Finder<'_> { |
492 | Finder { |
493 | needle: CowBytes::new(self.needle()), |
494 | searcher: self.searcher.clone(), |
495 | } |
496 | } |
497 | |
498 | /// Returns the needle that this finder searches for. |
499 | /// |
500 | /// Note that the lifetime of the needle returned is tied to the lifetime |
501 | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
502 | /// finder's needle can be either borrowed or owned, so the lifetime of the |
503 | /// needle returned must necessarily be the shorter of the two. |
504 | #[inline ] |
505 | pub fn needle(&self) -> &[u8] { |
506 | self.needle.as_slice() |
507 | } |
508 | } |
509 | |
510 | /// A single substring reverse searcher fixed to a particular needle. |
511 | /// |
512 | /// The purpose of this type is to permit callers to construct a substring |
513 | /// searcher that can be used to search haystacks without the overhead of |
514 | /// constructing the searcher in the first place. This is a somewhat niche |
515 | /// concern when it's necessary to re-use the same needle to search multiple |
516 | /// different haystacks with as little overhead as possible. In general, |
517 | /// using [`rfind`] is good enough, but `FinderRev` is useful when you can |
518 | /// meaningfully observe searcher construction time in a profile. |
519 | /// |
520 | /// When the `std` feature is enabled, then this type has an `into_owned` |
521 | /// version which permits building a `FinderRev` that is not connected to |
522 | /// the lifetime of its needle. |
523 | #[derive(Clone, Debug)] |
524 | pub struct FinderRev<'n> { |
525 | needle: CowBytes<'n>, |
526 | searcher: SearcherRev, |
527 | } |
528 | |
529 | impl<'n> FinderRev<'n> { |
530 | /// Create a new reverse finder for the given needle. |
531 | #[inline ] |
532 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { |
533 | FinderBuilder::new().build_reverse(needle) |
534 | } |
535 | |
536 | /// Returns the index of the last occurrence of this needle in the given |
537 | /// haystack. |
538 | /// |
539 | /// The haystack may be any type that can be cheaply converted into a |
540 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
541 | /// |
542 | /// # Complexity |
543 | /// |
544 | /// This routine is guaranteed to have worst case linear time complexity |
545 | /// with respect to both the needle and the haystack. That is, this runs |
546 | /// in `O(needle.len() + haystack.len())` time. |
547 | /// |
548 | /// This routine is also guaranteed to have worst case constant space |
549 | /// complexity. |
550 | /// |
551 | /// # Examples |
552 | /// |
553 | /// Basic usage: |
554 | /// |
555 | /// ``` |
556 | /// use memchr::memmem::FinderRev; |
557 | /// |
558 | /// let haystack = b"foo bar baz" ; |
559 | /// assert_eq!(Some(0), FinderRev::new("foo" ).rfind(haystack)); |
560 | /// assert_eq!(Some(4), FinderRev::new("bar" ).rfind(haystack)); |
561 | /// assert_eq!(None, FinderRev::new("quux" ).rfind(haystack)); |
562 | /// ``` |
563 | pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { |
564 | self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) |
565 | } |
566 | |
567 | /// Returns a reverse iterator over all occurrences of a substring in a |
568 | /// haystack. |
569 | /// |
570 | /// # Complexity |
571 | /// |
572 | /// This routine is guaranteed to have worst case linear time complexity |
573 | /// with respect to both the needle and the haystack. That is, this runs |
574 | /// in `O(needle.len() + haystack.len())` time. |
575 | /// |
576 | /// This routine is also guaranteed to have worst case constant space |
577 | /// complexity. |
578 | /// |
579 | /// # Examples |
580 | /// |
581 | /// Basic usage: |
582 | /// |
583 | /// ``` |
584 | /// use memchr::memmem::FinderRev; |
585 | /// |
586 | /// let haystack = b"foo bar foo baz foo" ; |
587 | /// let finder = FinderRev::new(b"foo" ); |
588 | /// let mut it = finder.rfind_iter(haystack); |
589 | /// assert_eq!(Some(16), it.next()); |
590 | /// assert_eq!(Some(8), it.next()); |
591 | /// assert_eq!(Some(0), it.next()); |
592 | /// assert_eq!(None, it.next()); |
593 | /// ``` |
594 | #[inline ] |
595 | pub fn rfind_iter<'a, 'h>( |
596 | &'a self, |
597 | haystack: &'h [u8], |
598 | ) -> FindRevIter<'h, 'a> { |
599 | FindRevIter::new(haystack, self.as_ref()) |
600 | } |
601 | |
602 | /// Convert this finder into its owned variant, such that it no longer |
603 | /// borrows the needle. |
604 | /// |
605 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
606 | /// this copies the needle. |
607 | /// |
608 | /// This is only available when the `std` feature is enabled. |
609 | #[cfg (feature = "alloc" )] |
610 | #[inline ] |
611 | pub fn into_owned(self) -> FinderRev<'static> { |
612 | FinderRev { |
613 | needle: self.needle.into_owned(), |
614 | searcher: self.searcher.clone(), |
615 | } |
616 | } |
617 | |
618 | /// Convert this finder into its borrowed variant. |
619 | /// |
620 | /// This is primarily useful if your finder is owned and you'd like to |
621 | /// store its borrowed variant in some intermediate data structure. |
622 | /// |
623 | /// Note that the lifetime parameter of the returned finder is tied to the |
624 | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
625 | /// needle itself. Namely, a finder's needle can be either borrowed or |
626 | /// owned, so the lifetime of the needle returned must necessarily be the |
627 | /// shorter of the two. |
628 | #[inline ] |
629 | pub fn as_ref(&self) -> FinderRev<'_> { |
630 | FinderRev { |
631 | needle: CowBytes::new(self.needle()), |
632 | searcher: self.searcher.clone(), |
633 | } |
634 | } |
635 | |
636 | /// Returns the needle that this finder searches for. |
637 | /// |
638 | /// Note that the lifetime of the needle returned is tied to the lifetime |
639 | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
640 | /// finder's needle can be either borrowed or owned, so the lifetime of the |
641 | /// needle returned must necessarily be the shorter of the two. |
642 | #[inline ] |
643 | pub fn needle(&self) -> &[u8] { |
644 | self.needle.as_slice() |
645 | } |
646 | } |
647 | |
648 | /// A builder for constructing non-default forward or reverse memmem finders. |
649 | /// |
650 | /// A builder is primarily useful for configuring a substring searcher. |
651 | /// Currently, the only configuration exposed is the ability to disable |
652 | /// heuristic prefilters used to speed up certain searches. |
653 | #[derive(Clone, Debug, Default)] |
654 | pub struct FinderBuilder { |
655 | prefilter: Prefilter, |
656 | } |
657 | |
658 | impl FinderBuilder { |
659 | /// Create a new finder builder with default settings. |
660 | pub fn new() -> FinderBuilder { |
661 | FinderBuilder::default() |
662 | } |
663 | |
664 | /// Build a forward finder using the given needle from the current |
665 | /// settings. |
666 | pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( |
667 | &self, |
668 | needle: &'n B, |
669 | ) -> Finder<'n> { |
670 | self.build_forward_with_ranker(DefaultFrequencyRank, needle) |
671 | } |
672 | |
673 | /// Build a forward finder using the given needle and a custom heuristic for |
674 | /// determining the frequency of a given byte in the dataset. |
675 | /// See [`HeuristicFrequencyRank`] for more details. |
676 | pub fn build_forward_with_ranker< |
677 | 'n, |
678 | R: HeuristicFrequencyRank, |
679 | B: ?Sized + AsRef<[u8]>, |
680 | >( |
681 | &self, |
682 | ranker: R, |
683 | needle: &'n B, |
684 | ) -> Finder<'n> { |
685 | let needle = needle.as_ref(); |
686 | Finder { |
687 | needle: CowBytes::new(needle), |
688 | searcher: Searcher::new(self.prefilter, ranker, needle), |
689 | } |
690 | } |
691 | |
692 | /// Build a reverse finder using the given needle from the current |
693 | /// settings. |
694 | pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( |
695 | &self, |
696 | needle: &'n B, |
697 | ) -> FinderRev<'n> { |
698 | let needle = needle.as_ref(); |
699 | FinderRev { |
700 | needle: CowBytes::new(needle), |
701 | searcher: SearcherRev::new(needle), |
702 | } |
703 | } |
704 | |
705 | /// Configure the prefilter setting for the finder. |
706 | /// |
707 | /// See the documentation for [`Prefilter`] for more discussion on why |
708 | /// you might want to configure this. |
709 | pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { |
710 | self.prefilter = prefilter; |
711 | self |
712 | } |
713 | } |
714 | |
715 | #[cfg (test)] |
716 | mod tests { |
717 | use super::*; |
718 | |
719 | define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); |
720 | define_substring_reverse_quickcheck!(|h, n| Some( |
721 | FinderRev::new(n).rfind(h) |
722 | )); |
723 | |
724 | #[test] |
725 | fn forward() { |
726 | crate::tests::substring::Runner::new() |
727 | .fwd(|h, n| Some(Finder::new(n).find(h))) |
728 | .run(); |
729 | } |
730 | |
731 | #[test] |
732 | fn reverse() { |
733 | crate::tests::substring::Runner::new() |
734 | .rev(|h, n| Some(FinderRev::new(n).rfind(h))) |
735 | .run(); |
736 | } |
737 | } |
738 | |