1 | use alloc::sync::Arc; |
2 | |
3 | use crate::{ |
4 | packed::{pattern::Patterns, rabinkarp::RabinKarp, teddy}, |
5 | util::search::{Match, Span}, |
6 | }; |
7 | |
8 | /// This is a limit placed on the total number of patterns we're willing to try |
9 | /// and match at once. As more sophisticated algorithms are added, this number |
10 | /// may be increased. |
11 | const PATTERN_LIMIT: usize = 128; |
12 | |
13 | /// A knob for controlling the match semantics of a packed multiple string |
14 | /// searcher. |
15 | /// |
16 | /// This differs from the [`MatchKind`](crate::MatchKind) type in the top-level |
17 | /// crate module in that it doesn't support "standard" match semantics, |
18 | /// and instead only supports leftmost-first or leftmost-longest. Namely, |
19 | /// "standard" semantics cannot be easily supported by packed searchers. |
20 | /// |
21 | /// For more information on the distinction between leftmost-first and |
22 | /// leftmost-longest, see the docs on the top-level `MatchKind` type. |
23 | /// |
24 | /// Unlike the top-level `MatchKind` type, the default match semantics for this |
25 | /// type are leftmost-first. |
26 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
27 | #[non_exhaustive ] |
28 | pub enum MatchKind { |
29 | /// Use leftmost-first match semantics, which reports leftmost matches. |
30 | /// When there are multiple possible leftmost matches, the match |
31 | /// corresponding to the pattern that appeared earlier when constructing |
32 | /// the automaton is reported. |
33 | /// |
34 | /// This is the default. |
35 | LeftmostFirst, |
36 | /// Use leftmost-longest match semantics, which reports leftmost matches. |
37 | /// When there are multiple possible leftmost matches, the longest match |
38 | /// is chosen. |
39 | LeftmostLongest, |
40 | } |
41 | |
42 | impl Default for MatchKind { |
43 | fn default() -> MatchKind { |
44 | MatchKind::LeftmostFirst |
45 | } |
46 | } |
47 | |
48 | /// The configuration for a packed multiple pattern searcher. |
49 | /// |
50 | /// The configuration is currently limited only to being able to select the |
51 | /// match semantics (leftmost-first or leftmost-longest) of a searcher. In the |
52 | /// future, more knobs may be made available. |
53 | /// |
54 | /// A configuration produces a [`packed::Builder`](Builder), which in turn can |
55 | /// be used to construct a [`packed::Searcher`](Searcher) for searching. |
56 | /// |
57 | /// # Example |
58 | /// |
59 | /// This example shows how to use leftmost-longest semantics instead of the |
60 | /// default (leftmost-first). |
61 | /// |
62 | /// ``` |
63 | /// use aho_corasick::{packed::{Config, MatchKind}, PatternID}; |
64 | /// |
65 | /// # fn example() -> Option<()> { |
66 | /// let searcher = Config::new() |
67 | /// .match_kind(MatchKind::LeftmostLongest) |
68 | /// .builder() |
69 | /// .add("foo" ) |
70 | /// .add("foobar" ) |
71 | /// .build()?; |
72 | /// let matches: Vec<PatternID> = searcher |
73 | /// .find_iter("foobar" ) |
74 | /// .map(|mat| mat.pattern()) |
75 | /// .collect(); |
76 | /// assert_eq!(vec![PatternID::must(1)], matches); |
77 | /// # Some(()) } |
78 | /// # if cfg!(all(feature = "std" , any( |
79 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
80 | /// # ))) { |
81 | /// # example().unwrap() |
82 | /// # } else { |
83 | /// # assert!(example().is_none()); |
84 | /// # } |
85 | /// ``` |
86 | #[derive(Clone, Debug)] |
87 | pub struct Config { |
88 | kind: MatchKind, |
89 | force: Option<ForceAlgorithm>, |
90 | only_teddy_fat: Option<bool>, |
91 | only_teddy_256bit: Option<bool>, |
92 | heuristic_pattern_limits: bool, |
93 | } |
94 | |
95 | /// An internal option for forcing the use of a particular packed algorithm. |
96 | /// |
97 | /// When an algorithm is forced, if a searcher could not be constructed for it, |
98 | /// then no searcher will be returned even if an alternative algorithm would |
99 | /// work. |
100 | #[derive(Clone, Debug)] |
101 | enum ForceAlgorithm { |
102 | Teddy, |
103 | RabinKarp, |
104 | } |
105 | |
106 | impl Default for Config { |
107 | fn default() -> Config { |
108 | Config::new() |
109 | } |
110 | } |
111 | |
112 | impl Config { |
113 | /// Create a new default configuration. A default configuration uses |
114 | /// leftmost-first match semantics. |
115 | pub fn new() -> Config { |
116 | Config { |
117 | kind: MatchKind::LeftmostFirst, |
118 | force: None, |
119 | only_teddy_fat: None, |
120 | only_teddy_256bit: None, |
121 | heuristic_pattern_limits: true, |
122 | } |
123 | } |
124 | |
125 | /// Create a packed builder from this configuration. The builder can be |
126 | /// used to accumulate patterns and create a [`Searcher`] from them. |
127 | pub fn builder(&self) -> Builder { |
128 | Builder::from_config(self.clone()) |
129 | } |
130 | |
131 | /// Set the match semantics for this configuration. |
132 | pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config { |
133 | self.kind = kind; |
134 | self |
135 | } |
136 | |
137 | /// An undocumented method for forcing the use of the Teddy algorithm. |
138 | /// |
139 | /// This is only exposed for more precise testing and benchmarks. Callers |
140 | /// should not use it as it is not part of the API stability guarantees of |
141 | /// this crate. |
142 | #[doc (hidden)] |
143 | pub fn only_teddy(&mut self, yes: bool) -> &mut Config { |
144 | if yes { |
145 | self.force = Some(ForceAlgorithm::Teddy); |
146 | } else { |
147 | self.force = None; |
148 | } |
149 | self |
150 | } |
151 | |
152 | /// An undocumented method for forcing the use of the Fat Teddy algorithm. |
153 | /// |
154 | /// This is only exposed for more precise testing and benchmarks. Callers |
155 | /// should not use it as it is not part of the API stability guarantees of |
156 | /// this crate. |
157 | #[doc (hidden)] |
158 | pub fn only_teddy_fat(&mut self, yes: Option<bool>) -> &mut Config { |
159 | self.only_teddy_fat = yes; |
160 | self |
161 | } |
162 | |
163 | /// An undocumented method for forcing the use of SSE (`Some(false)`) or |
164 | /// AVX (`Some(true)`) algorithms. |
165 | /// |
166 | /// This is only exposed for more precise testing and benchmarks. Callers |
167 | /// should not use it as it is not part of the API stability guarantees of |
168 | /// this crate. |
169 | #[doc (hidden)] |
170 | pub fn only_teddy_256bit(&mut self, yes: Option<bool>) -> &mut Config { |
171 | self.only_teddy_256bit = yes; |
172 | self |
173 | } |
174 | |
175 | /// An undocumented method for forcing the use of the Rabin-Karp algorithm. |
176 | /// |
177 | /// This is only exposed for more precise testing and benchmarks. Callers |
178 | /// should not use it as it is not part of the API stability guarantees of |
179 | /// this crate. |
180 | #[doc (hidden)] |
181 | pub fn only_rabin_karp(&mut self, yes: bool) -> &mut Config { |
182 | if yes { |
183 | self.force = Some(ForceAlgorithm::RabinKarp); |
184 | } else { |
185 | self.force = None; |
186 | } |
187 | self |
188 | } |
189 | |
190 | /// Request that heuristic limitations on the number of patterns be |
191 | /// employed. This useful to disable for benchmarking where one wants to |
192 | /// explore how Teddy performs on large number of patterns even if the |
193 | /// heuristics would otherwise refuse construction. |
194 | /// |
195 | /// This is enabled by default. |
196 | pub fn heuristic_pattern_limits(&mut self, yes: bool) -> &mut Config { |
197 | self.heuristic_pattern_limits = yes; |
198 | self |
199 | } |
200 | } |
201 | |
202 | /// A builder for constructing a packed searcher from a collection of patterns. |
203 | /// |
204 | /// # Example |
205 | /// |
206 | /// This example shows how to use a builder to construct a searcher. By |
207 | /// default, leftmost-first match semantics are used. |
208 | /// |
209 | /// ``` |
210 | /// use aho_corasick::{packed::{Builder, MatchKind}, PatternID}; |
211 | /// |
212 | /// # fn example() -> Option<()> { |
213 | /// let searcher = Builder::new() |
214 | /// .add("foobar" ) |
215 | /// .add("foo" ) |
216 | /// .build()?; |
217 | /// let matches: Vec<PatternID> = searcher |
218 | /// .find_iter("foobar" ) |
219 | /// .map(|mat| mat.pattern()) |
220 | /// .collect(); |
221 | /// assert_eq!(vec![PatternID::ZERO], matches); |
222 | /// # Some(()) } |
223 | /// # if cfg!(all(feature = "std" , any( |
224 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
225 | /// # ))) { |
226 | /// # example().unwrap() |
227 | /// # } else { |
228 | /// # assert!(example().is_none()); |
229 | /// # } |
230 | /// ``` |
231 | #[derive(Clone, Debug)] |
232 | pub struct Builder { |
233 | /// The configuration of this builder and subsequent matcher. |
234 | config: Config, |
235 | /// Set to true if the builder detects that a matcher cannot be built. |
236 | inert: bool, |
237 | /// The patterns provided by the caller. |
238 | patterns: Patterns, |
239 | } |
240 | |
241 | impl Builder { |
242 | /// Create a new builder for constructing a multi-pattern searcher. This |
243 | /// constructor uses the default configuration. |
244 | pub fn new() -> Builder { |
245 | Builder::from_config(Config::new()) |
246 | } |
247 | |
248 | fn from_config(config: Config) -> Builder { |
249 | Builder { config, inert: false, patterns: Patterns::new() } |
250 | } |
251 | |
252 | /// Build a searcher from the patterns added to this builder so far. |
253 | pub fn build(&self) -> Option<Searcher> { |
254 | if self.inert || self.patterns.is_empty() { |
255 | return None; |
256 | } |
257 | let mut patterns = self.patterns.clone(); |
258 | patterns.set_match_kind(self.config.kind); |
259 | let patterns = Arc::new(patterns); |
260 | let rabinkarp = RabinKarp::new(&patterns); |
261 | // Effectively, we only want to return a searcher if we can use Teddy, |
262 | // since Teddy is our only fast packed searcher at the moment. |
263 | // Rabin-Karp is only used when searching haystacks smaller than what |
264 | // Teddy can support. Thus, the only way to get a Rabin-Karp searcher |
265 | // is to force it using undocumented APIs (for tests/benchmarks). |
266 | let (search_kind, minimum_len) = match self.config.force { |
267 | None | Some(ForceAlgorithm::Teddy) => { |
268 | debug!("trying to build Teddy packed matcher" ); |
269 | let teddy = match self.build_teddy(Arc::clone(&patterns)) { |
270 | None => return None, |
271 | Some(teddy) => teddy, |
272 | }; |
273 | let minimum_len = teddy.minimum_len(); |
274 | (SearchKind::Teddy(teddy), minimum_len) |
275 | } |
276 | Some(ForceAlgorithm::RabinKarp) => { |
277 | debug!("using Rabin-Karp packed matcher" ); |
278 | (SearchKind::RabinKarp, 0) |
279 | } |
280 | }; |
281 | Some(Searcher { patterns, rabinkarp, search_kind, minimum_len }) |
282 | } |
283 | |
284 | fn build_teddy(&self, patterns: Arc<Patterns>) -> Option<teddy::Searcher> { |
285 | teddy::Builder::new() |
286 | .only_256bit(self.config.only_teddy_256bit) |
287 | .only_fat(self.config.only_teddy_fat) |
288 | .heuristic_pattern_limits(self.config.heuristic_pattern_limits) |
289 | .build(patterns) |
290 | } |
291 | |
292 | /// Add the given pattern to this set to match. |
293 | /// |
294 | /// The order in which patterns are added is significant. Namely, when |
295 | /// using leftmost-first match semantics, then when multiple patterns can |
296 | /// match at a particular location, the pattern that was added first is |
297 | /// used as the match. |
298 | /// |
299 | /// If the number of patterns added exceeds the amount supported by packed |
300 | /// searchers, then the builder will stop accumulating patterns and render |
301 | /// itself inert. At this point, constructing a searcher will always return |
302 | /// `None`. |
303 | pub fn add<P: AsRef<[u8]>>(&mut self, pattern: P) -> &mut Builder { |
304 | if self.inert { |
305 | return self; |
306 | } else if self.patterns.len() >= PATTERN_LIMIT { |
307 | self.inert = true; |
308 | self.patterns.reset(); |
309 | return self; |
310 | } |
311 | // Just in case PATTERN_LIMIT increases beyond u16::MAX. |
312 | assert!(self.patterns.len() <= core::u16::MAX as usize); |
313 | |
314 | let pattern = pattern.as_ref(); |
315 | if pattern.is_empty() { |
316 | self.inert = true; |
317 | self.patterns.reset(); |
318 | return self; |
319 | } |
320 | self.patterns.add(pattern); |
321 | self |
322 | } |
323 | |
324 | /// Add the given iterator of patterns to this set to match. |
325 | /// |
326 | /// The iterator must yield elements that can be converted into a `&[u8]`. |
327 | /// |
328 | /// The order in which patterns are added is significant. Namely, when |
329 | /// using leftmost-first match semantics, then when multiple patterns can |
330 | /// match at a particular location, the pattern that was added first is |
331 | /// used as the match. |
332 | /// |
333 | /// If the number of patterns added exceeds the amount supported by packed |
334 | /// searchers, then the builder will stop accumulating patterns and render |
335 | /// itself inert. At this point, constructing a searcher will always return |
336 | /// `None`. |
337 | pub fn extend<I, P>(&mut self, patterns: I) -> &mut Builder |
338 | where |
339 | I: IntoIterator<Item = P>, |
340 | P: AsRef<[u8]>, |
341 | { |
342 | for p in patterns { |
343 | self.add(p); |
344 | } |
345 | self |
346 | } |
347 | |
348 | /// Returns the number of patterns added to this builder. |
349 | pub fn len(&self) -> usize { |
350 | self.patterns.len() |
351 | } |
352 | |
353 | /// Returns the length, in bytes, of the shortest pattern added. |
354 | pub fn minimum_len(&self) -> usize { |
355 | self.patterns.minimum_len() |
356 | } |
357 | } |
358 | |
359 | impl Default for Builder { |
360 | fn default() -> Builder { |
361 | Builder::new() |
362 | } |
363 | } |
364 | |
365 | /// A packed searcher for quickly finding occurrences of multiple patterns. |
366 | /// |
367 | /// If callers need more flexible construction, or if one wants to change the |
368 | /// match semantics (either leftmost-first or leftmost-longest), then one can |
369 | /// use the [`Config`] and/or [`Builder`] types for more fine grained control. |
370 | /// |
371 | /// # Example |
372 | /// |
373 | /// This example shows how to create a searcher from an iterator of patterns. |
374 | /// By default, leftmost-first match semantics are used. |
375 | /// |
376 | /// ``` |
377 | /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; |
378 | /// |
379 | /// # fn example() -> Option<()> { |
380 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
381 | /// let matches: Vec<PatternID> = searcher |
382 | /// .find_iter("foobar" ) |
383 | /// .map(|mat| mat.pattern()) |
384 | /// .collect(); |
385 | /// assert_eq!(vec![PatternID::ZERO], matches); |
386 | /// # Some(()) } |
387 | /// # if cfg!(all(feature = "std" , any( |
388 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
389 | /// # ))) { |
390 | /// # example().unwrap() |
391 | /// # } else { |
392 | /// # assert!(example().is_none()); |
393 | /// # } |
394 | /// ``` |
395 | #[derive(Clone, Debug)] |
396 | pub struct Searcher { |
397 | patterns: Arc<Patterns>, |
398 | rabinkarp: RabinKarp, |
399 | search_kind: SearchKind, |
400 | minimum_len: usize, |
401 | } |
402 | |
403 | #[derive(Clone, Debug)] |
404 | enum SearchKind { |
405 | Teddy(teddy::Searcher), |
406 | RabinKarp, |
407 | } |
408 | |
409 | impl Searcher { |
410 | /// A convenience function for constructing a searcher from an iterator |
411 | /// of things that can be converted to a `&[u8]`. |
412 | /// |
413 | /// If a searcher could not be constructed (either because of an |
414 | /// unsupported CPU or because there are too many patterns), then `None` |
415 | /// is returned. |
416 | /// |
417 | /// # Example |
418 | /// |
419 | /// Basic usage: |
420 | /// |
421 | /// ``` |
422 | /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; |
423 | /// |
424 | /// # fn example() -> Option<()> { |
425 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
426 | /// let matches: Vec<PatternID> = searcher |
427 | /// .find_iter("foobar" ) |
428 | /// .map(|mat| mat.pattern()) |
429 | /// .collect(); |
430 | /// assert_eq!(vec![PatternID::ZERO], matches); |
431 | /// # Some(()) } |
432 | /// # if cfg!(all(feature = "std" , any( |
433 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
434 | /// # ))) { |
435 | /// # example().unwrap() |
436 | /// # } else { |
437 | /// # assert!(example().is_none()); |
438 | /// # } |
439 | /// ``` |
440 | pub fn new<I, P>(patterns: I) -> Option<Searcher> |
441 | where |
442 | I: IntoIterator<Item = P>, |
443 | P: AsRef<[u8]>, |
444 | { |
445 | Builder::new().extend(patterns).build() |
446 | } |
447 | |
448 | /// A convenience function for calling `Config::new()`. |
449 | /// |
450 | /// This is useful for avoiding an additional import. |
451 | pub fn config() -> Config { |
452 | Config::new() |
453 | } |
454 | |
455 | /// A convenience function for calling `Builder::new()`. |
456 | /// |
457 | /// This is useful for avoiding an additional import. |
458 | pub fn builder() -> Builder { |
459 | Builder::new() |
460 | } |
461 | |
462 | /// Return the first occurrence of any of the patterns in this searcher, |
463 | /// according to its match semantics, in the given haystack. The `Match` |
464 | /// returned will include the identifier of the pattern that matched, which |
465 | /// corresponds to the index of the pattern (starting from `0`) in which it |
466 | /// was added. |
467 | /// |
468 | /// # Example |
469 | /// |
470 | /// Basic usage: |
471 | /// |
472 | /// ``` |
473 | /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; |
474 | /// |
475 | /// # fn example() -> Option<()> { |
476 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
477 | /// let mat = searcher.find("foobar" )?; |
478 | /// assert_eq!(PatternID::ZERO, mat.pattern()); |
479 | /// assert_eq!(0, mat.start()); |
480 | /// assert_eq!(6, mat.end()); |
481 | /// # Some(()) } |
482 | /// # if cfg!(all(feature = "std" , any( |
483 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
484 | /// # ))) { |
485 | /// # example().unwrap() |
486 | /// # } else { |
487 | /// # assert!(example().is_none()); |
488 | /// # } |
489 | /// ``` |
490 | #[inline ] |
491 | pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<Match> { |
492 | let haystack = haystack.as_ref(); |
493 | self.find_in(haystack, Span::from(0..haystack.len())) |
494 | } |
495 | |
496 | /// Return the first occurrence of any of the patterns in this searcher, |
497 | /// according to its match semantics, in the given haystack starting from |
498 | /// the given position. |
499 | /// |
500 | /// The `Match` returned will include the identifier of the pattern that |
501 | /// matched, which corresponds to the index of the pattern (starting from |
502 | /// `0`) in which it was added. The offsets in the `Match` will be relative |
503 | /// to the start of `haystack` (and not `at`). |
504 | /// |
505 | /// # Example |
506 | /// |
507 | /// Basic usage: |
508 | /// |
509 | /// ``` |
510 | /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID, Span}; |
511 | /// |
512 | /// # fn example() -> Option<()> { |
513 | /// let haystack = "foofoobar" ; |
514 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
515 | /// let mat = searcher.find_in(haystack, Span::from(3..haystack.len()))?; |
516 | /// assert_eq!(PatternID::ZERO, mat.pattern()); |
517 | /// assert_eq!(3, mat.start()); |
518 | /// assert_eq!(9, mat.end()); |
519 | /// # Some(()) } |
520 | /// # if cfg!(all(feature = "std" , any( |
521 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
522 | /// # ))) { |
523 | /// # example().unwrap() |
524 | /// # } else { |
525 | /// # assert!(example().is_none()); |
526 | /// # } |
527 | /// ``` |
528 | #[inline ] |
529 | pub fn find_in<B: AsRef<[u8]>>( |
530 | &self, |
531 | haystack: B, |
532 | span: Span, |
533 | ) -> Option<Match> { |
534 | let haystack = haystack.as_ref(); |
535 | match self.search_kind { |
536 | SearchKind::Teddy(ref teddy) => { |
537 | if haystack[span].len() < teddy.minimum_len() { |
538 | return self.find_in_slow(haystack, span); |
539 | } |
540 | teddy.find(&haystack[..span.end], span.start) |
541 | } |
542 | SearchKind::RabinKarp => { |
543 | self.rabinkarp.find_at(&haystack[..span.end], span.start) |
544 | } |
545 | } |
546 | } |
547 | |
548 | /// Return an iterator of non-overlapping occurrences of the patterns in |
549 | /// this searcher, according to its match semantics, in the given haystack. |
550 | /// |
551 | /// # Example |
552 | /// |
553 | /// Basic usage: |
554 | /// |
555 | /// ``` |
556 | /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID}; |
557 | /// |
558 | /// # fn example() -> Option<()> { |
559 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
560 | /// let matches: Vec<PatternID> = searcher |
561 | /// .find_iter("foobar fooba foofoo" ) |
562 | /// .map(|mat| mat.pattern()) |
563 | /// .collect(); |
564 | /// assert_eq!(vec![ |
565 | /// PatternID::must(0), |
566 | /// PatternID::must(1), |
567 | /// PatternID::must(1), |
568 | /// PatternID::must(1), |
569 | /// ], matches); |
570 | /// # Some(()) } |
571 | /// # if cfg!(all(feature = "std" , any( |
572 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
573 | /// # ))) { |
574 | /// # example().unwrap() |
575 | /// # } else { |
576 | /// # assert!(example().is_none()); |
577 | /// # } |
578 | /// ``` |
579 | #[inline ] |
580 | pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( |
581 | &'a self, |
582 | haystack: &'b B, |
583 | ) -> FindIter<'a, 'b> { |
584 | let haystack = haystack.as_ref(); |
585 | let span = Span::from(0..haystack.len()); |
586 | FindIter { searcher: self, haystack, span } |
587 | } |
588 | |
589 | /// Returns the match kind used by this packed searcher. |
590 | /// |
591 | /// # Examples |
592 | /// |
593 | /// Basic usage: |
594 | /// |
595 | /// ``` |
596 | /// use aho_corasick::packed::{MatchKind, Searcher}; |
597 | /// |
598 | /// # fn example() -> Option<()> { |
599 | /// let searcher = Searcher::new(["foobar" , "foo" ].iter().cloned())?; |
600 | /// // leftmost-first is the default. |
601 | /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind()); |
602 | /// # Some(()) } |
603 | /// # if cfg!(all(feature = "std" , any( |
604 | /// # target_arch = "x86_64" , target_arch = "aarch64" , |
605 | /// # ))) { |
606 | /// # example().unwrap() |
607 | /// # } else { |
608 | /// # assert!(example().is_none()); |
609 | /// # } |
610 | /// ``` |
611 | #[inline ] |
612 | pub fn match_kind(&self) -> &MatchKind { |
613 | self.patterns.match_kind() |
614 | } |
615 | |
616 | /// Returns the minimum length of a haystack that is required in order for |
617 | /// packed searching to be effective. |
618 | /// |
619 | /// In some cases, the underlying packed searcher may not be able to search |
620 | /// very short haystacks. When that occurs, the implementation will defer |
621 | /// to a slower non-packed searcher (which is still generally faster than |
622 | /// Aho-Corasick for a small number of patterns). However, callers may |
623 | /// want to avoid ever using the slower variant, which one can do by |
624 | /// never passing a haystack shorter than the minimum length returned by |
625 | /// this method. |
626 | #[inline ] |
627 | pub fn minimum_len(&self) -> usize { |
628 | self.minimum_len |
629 | } |
630 | |
631 | /// Returns the approximate total amount of heap used by this searcher, in |
632 | /// units of bytes. |
633 | #[inline ] |
634 | pub fn memory_usage(&self) -> usize { |
635 | self.patterns.memory_usage() |
636 | + self.rabinkarp.memory_usage() |
637 | + self.search_kind.memory_usage() |
638 | } |
639 | |
640 | /// Use a slow (non-packed) searcher. |
641 | /// |
642 | /// This is useful when a packed searcher could be constructed, but could |
643 | /// not be used to search a specific haystack. For example, if Teddy was |
644 | /// built but the haystack is smaller than ~34 bytes, then Teddy might not |
645 | /// be able to run. |
646 | fn find_in_slow(&self, haystack: &[u8], span: Span) -> Option<Match> { |
647 | self.rabinkarp.find_at(&haystack[..span.end], span.start) |
648 | } |
649 | } |
650 | |
651 | impl SearchKind { |
652 | fn memory_usage(&self) -> usize { |
653 | match *self { |
654 | SearchKind::Teddy(ref ted) => ted.memory_usage(), |
655 | SearchKind::RabinKarp => 0, |
656 | } |
657 | } |
658 | } |
659 | |
660 | /// An iterator over non-overlapping matches from a packed searcher. |
661 | /// |
662 | /// The lifetime `'s` refers to the lifetime of the underlying [`Searcher`], |
663 | /// while the lifetime `'h` refers to the lifetime of the haystack being |
664 | /// searched. |
665 | #[derive(Debug)] |
666 | pub struct FindIter<'s, 'h> { |
667 | searcher: &'s Searcher, |
668 | haystack: &'h [u8], |
669 | span: Span, |
670 | } |
671 | |
672 | impl<'s, 'h> Iterator for FindIter<'s, 'h> { |
673 | type Item = Match; |
674 | |
675 | fn next(&mut self) -> Option<Match> { |
676 | if self.span.start > self.span.end { |
677 | return None; |
678 | } |
679 | match self.searcher.find_in(&self.haystack, self.span) { |
680 | None => None, |
681 | Some(m) => { |
682 | self.span.start = m.end(); |
683 | Some(m) |
684 | } |
685 | } |
686 | } |
687 | } |
688 | |