| 1 | use core::ops::{Range, RangeBounds}; |
| 2 | |
| 3 | use crate::util::primitives::PatternID; |
| 4 | |
| 5 | /// The configuration and the haystack to use for an Aho-Corasick search. |
| 6 | /// |
| 7 | /// When executing a search, there are a few parameters one might want to |
| 8 | /// configure: |
| 9 | /// |
| 10 | /// * The haystack to search, provided to the [`Input::new`] constructor. This |
| 11 | /// is the only required parameter. |
| 12 | /// * The span _within_ the haystack to limit a search to. (The default |
| 13 | /// is the entire haystack.) This is configured via [`Input::span`] or |
| 14 | /// [`Input::range`]. |
| 15 | /// * Whether to run an unanchored (matches can occur anywhere after the |
| 16 | /// start of the search) or anchored (matches can only occur beginning at |
| 17 | /// the start of the search) search. Unanchored search is the default. This is |
| 18 | /// configured via [`Input::anchored`]. |
| 19 | /// * Whether to quit the search as soon as a match has been found, regardless |
| 20 | /// of the [`MatchKind`] that the searcher was built with. This is configured |
| 21 | /// via [`Input::earliest`]. |
| 22 | /// |
| 23 | /// For most cases, the defaults for all optional parameters are appropriate. |
| 24 | /// The utility of this type is that it keeps the default or common case simple |
| 25 | /// while permitting tweaking parameters in more niche use cases while reusing |
| 26 | /// the same search APIs. |
| 27 | /// |
| 28 | /// # Valid bounds and search termination |
| 29 | /// |
| 30 | /// An `Input` permits setting the bounds of a search via either |
| 31 | /// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or |
| 32 | /// else a panic will occur. Bounds are valid if and only if: |
| 33 | /// |
| 34 | /// * The bounds represent a valid range into the input's haystack. |
| 35 | /// * **or** the end bound is a valid ending bound for the haystack *and* |
| 36 | /// the start bound is exactly one greater than the end bound. |
| 37 | /// |
| 38 | /// In the latter case, [`Input::is_done`] will return true and indicates any |
| 39 | /// search receiving such an input should immediately return with no match. |
| 40 | /// |
| 41 | /// Other than representing "search is complete," the `Input::span` and |
| 42 | /// `Input::range` APIs are never necessary. Instead, callers can slice the |
| 43 | /// haystack instead, e.g., with `&haystack[start..end]`. With that said, they |
| 44 | /// can be more convenient than slicing because the match positions reported |
| 45 | /// when using `Input::span` or `Input::range` are in terms of the original |
| 46 | /// haystack. If you instead use `&haystack[start..end]`, then you'll need to |
| 47 | /// add `start` to any match position returned in order for it to be a correct |
| 48 | /// index into `haystack`. |
| 49 | /// |
| 50 | /// # Example: `&str` and `&[u8]` automatically convert to an `Input` |
| 51 | /// |
| 52 | /// There is a `From<&T> for Input` implementation for all `T: AsRef<[u8]>`. |
| 53 | /// Additionally, the [`AhoCorasick`](crate::AhoCorasick) search APIs accept |
| 54 | /// a `Into<Input>`. These two things combined together mean you can provide |
| 55 | /// things like `&str` and `&[u8]` to search APIs when the defaults are |
| 56 | /// suitable, but also an `Input` when they're not. For example: |
| 57 | /// |
| 58 | /// ``` |
| 59 | /// use aho_corasick::{AhoCorasick, Anchored, Input, Match, StartKind}; |
| 60 | /// |
| 61 | /// // Build a searcher that supports both unanchored and anchored modes. |
| 62 | /// let ac = AhoCorasick::builder() |
| 63 | /// .start_kind(StartKind::Both) |
| 64 | /// .build(&["abcd" , "b" ]) |
| 65 | /// .unwrap(); |
| 66 | /// let haystack = "abcd" ; |
| 67 | /// |
| 68 | /// // A search using default parameters is unanchored. With standard |
| 69 | /// // semantics, this finds `b` first. |
| 70 | /// assert_eq!( |
| 71 | /// Some(Match::must(1, 1..2)), |
| 72 | /// ac.find(haystack), |
| 73 | /// ); |
| 74 | /// // Using the same 'find' routine, we can provide an 'Input' explicitly |
| 75 | /// // that is configured to do an anchored search. Since 'b' doesn't start |
| 76 | /// // at the beginning of the search, it is not reported as a match. |
| 77 | /// assert_eq!( |
| 78 | /// Some(Match::must(0, 0..4)), |
| 79 | /// ac.find(Input::new(haystack).anchored(Anchored::Yes)), |
| 80 | /// ); |
| 81 | /// ``` |
| 82 | #[derive (Clone)] |
| 83 | pub struct Input<'h> { |
| 84 | haystack: &'h [u8], |
| 85 | span: Span, |
| 86 | anchored: Anchored, |
| 87 | earliest: bool, |
| 88 | } |
| 89 | |
| 90 | impl<'h> Input<'h> { |
| 91 | /// Create a new search configuration for the given haystack. |
| 92 | #[inline ] |
| 93 | pub fn new<H: ?Sized + AsRef<[u8]>>(haystack: &'h H) -> Input<'h> { |
| 94 | Input { |
| 95 | haystack: haystack.as_ref(), |
| 96 | span: Span { start: 0, end: haystack.as_ref().len() }, |
| 97 | anchored: Anchored::No, |
| 98 | earliest: false, |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | /// Set the span for this search. |
| 103 | /// |
| 104 | /// This routine is generic over how a span is provided. While |
| 105 | /// a [`Span`] may be given directly, one may also provide a |
| 106 | /// `std::ops::Range<usize>`. To provide anything supported by range |
| 107 | /// syntax, use the [`Input::range`] method. |
| 108 | /// |
| 109 | /// The default span is the entire haystack. |
| 110 | /// |
| 111 | /// Note that [`Input::range`] overrides this method and vice versa. |
| 112 | /// |
| 113 | /// # Panics |
| 114 | /// |
| 115 | /// This panics if the given span does not correspond to valid bounds in |
| 116 | /// the haystack or the termination of a search. |
| 117 | /// |
| 118 | /// # Example |
| 119 | /// |
| 120 | /// This example shows how the span of the search can impact whether a |
| 121 | /// match is reported or not. |
| 122 | /// |
| 123 | /// ``` |
| 124 | /// use aho_corasick::{AhoCorasick, Input, MatchKind}; |
| 125 | /// |
| 126 | /// let patterns = &["b" , "abcd" , "abc" ]; |
| 127 | /// let haystack = "abcd" ; |
| 128 | /// |
| 129 | /// let ac = AhoCorasick::builder() |
| 130 | /// .match_kind(MatchKind::LeftmostFirst) |
| 131 | /// .build(patterns) |
| 132 | /// .unwrap(); |
| 133 | /// let input = Input::new(haystack).span(0..3); |
| 134 | /// let mat = ac.try_find(input)?.expect("should have a match" ); |
| 135 | /// // Without the span stopping the search early, 'abcd' would be reported |
| 136 | /// // because it is the correct leftmost-first match. |
| 137 | /// assert_eq!("abc" , &haystack[mat.span()]); |
| 138 | /// |
| 139 | /// # Ok::<(), Box<dyn std::error::Error>>(()) |
| 140 | /// ``` |
| 141 | #[inline ] |
| 142 | pub fn span<S: Into<Span>>(mut self, span: S) -> Input<'h> { |
| 143 | self.set_span(span); |
| 144 | self |
| 145 | } |
| 146 | |
| 147 | /// Like `Input::span`, but accepts any range instead. |
| 148 | /// |
| 149 | /// The default range is the entire haystack. |
| 150 | /// |
| 151 | /// Note that [`Input::span`] overrides this method and vice versa. |
| 152 | /// |
| 153 | /// # Panics |
| 154 | /// |
| 155 | /// This routine will panic if the given range could not be converted |
| 156 | /// to a valid [`Range`]. For example, this would panic when given |
| 157 | /// `0..=usize::MAX` since it cannot be represented using a half-open |
| 158 | /// interval in terms of `usize`. |
| 159 | /// |
| 160 | /// This routine also panics if the given range does not correspond to |
| 161 | /// valid bounds in the haystack or the termination of a search. |
| 162 | /// |
| 163 | /// # Example |
| 164 | /// |
| 165 | /// ``` |
| 166 | /// use aho_corasick::Input; |
| 167 | /// |
| 168 | /// let input = Input::new("foobar" ); |
| 169 | /// assert_eq!(0..6, input.get_range()); |
| 170 | /// |
| 171 | /// let input = Input::new("foobar" ).range(2..=4); |
| 172 | /// assert_eq!(2..5, input.get_range()); |
| 173 | /// ``` |
| 174 | #[inline ] |
| 175 | pub fn range<R: RangeBounds<usize>>(mut self, range: R) -> Input<'h> { |
| 176 | self.set_range(range); |
| 177 | self |
| 178 | } |
| 179 | |
| 180 | /// Sets the anchor mode of a search. |
| 181 | /// |
| 182 | /// When a search is anchored (via [`Anchored::Yes`]), a match must begin |
| 183 | /// at the start of a search. When a search is not anchored (that's |
| 184 | /// [`Anchored::No`]), searchers will look for a match anywhere in the |
| 185 | /// haystack. |
| 186 | /// |
| 187 | /// By default, the anchored mode is [`Anchored::No`]. |
| 188 | /// |
| 189 | /// # Support for anchored searches |
| 190 | /// |
| 191 | /// Anchored or unanchored searches might not always be available, |
| 192 | /// depending on the type of searcher used and its configuration: |
| 193 | /// |
| 194 | /// * [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA) always |
| 195 | /// supports both unanchored and anchored searches. |
| 196 | /// * [`contiguous::NFA`](crate::nfa::contiguous::NFA) always supports both |
| 197 | /// unanchored and anchored searches. |
| 198 | /// * [`dfa::DFA`](crate::dfa::DFA) supports only unanchored |
| 199 | /// searches by default. |
| 200 | /// [`dfa::Builder::start_kind`](crate::dfa::Builder::start_kind) can |
| 201 | /// be used to change the default to supporting both kinds of searches |
| 202 | /// or even just anchored searches. |
| 203 | /// * [`AhoCorasick`](crate::AhoCorasick) inherits the same setup as a |
| 204 | /// `DFA`. Namely, it only supports unanchored searches by default, but |
| 205 | /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind) |
| 206 | /// can change this. |
| 207 | /// |
| 208 | /// If you try to execute a search using a `try_` ("fallible") method with |
| 209 | /// an unsupported anchor mode, then an error will be returned. For calls |
| 210 | /// to infallible search methods, a panic will result. |
| 211 | /// |
| 212 | /// # Example |
| 213 | /// |
| 214 | /// This demonstrates the differences between an anchored search and |
| 215 | /// an unanchored search. Notice that we build our `AhoCorasick` searcher |
| 216 | /// with [`StartKind::Both`] so that it supports both unanchored and |
| 217 | /// anchored searches simultaneously. |
| 218 | /// |
| 219 | /// ``` |
| 220 | /// use aho_corasick::{ |
| 221 | /// AhoCorasick, Anchored, Input, MatchKind, StartKind, |
| 222 | /// }; |
| 223 | /// |
| 224 | /// let patterns = &["bcd" ]; |
| 225 | /// let haystack = "abcd" ; |
| 226 | /// |
| 227 | /// let ac = AhoCorasick::builder() |
| 228 | /// .start_kind(StartKind::Both) |
| 229 | /// .build(patterns) |
| 230 | /// .unwrap(); |
| 231 | /// |
| 232 | /// // Note that 'Anchored::No' is the default, so it doesn't need to |
| 233 | /// // be explicitly specified here. |
| 234 | /// let input = Input::new(haystack); |
| 235 | /// let mat = ac.try_find(input)?.expect("should have a match" ); |
| 236 | /// assert_eq!("bcd" , &haystack[mat.span()]); |
| 237 | /// |
| 238 | /// // While 'bcd' occurs in the haystack, it does not begin where our |
| 239 | /// // search begins, so no match is found. |
| 240 | /// let input = Input::new(haystack).anchored(Anchored::Yes); |
| 241 | /// assert_eq!(None, ac.try_find(input)?); |
| 242 | /// |
| 243 | /// // However, if we start our search where 'bcd' starts, then we will |
| 244 | /// // find a match. |
| 245 | /// let input = Input::new(haystack).range(1..).anchored(Anchored::Yes); |
| 246 | /// let mat = ac.try_find(input)?.expect("should have a match" ); |
| 247 | /// assert_eq!("bcd" , &haystack[mat.span()]); |
| 248 | /// |
| 249 | /// # Ok::<(), Box<dyn std::error::Error>>(()) |
| 250 | /// ``` |
| 251 | #[inline ] |
| 252 | pub fn anchored(mut self, mode: Anchored) -> Input<'h> { |
| 253 | self.set_anchored(mode); |
| 254 | self |
| 255 | } |
| 256 | |
| 257 | /// Whether to execute an "earliest" search or not. |
| 258 | /// |
| 259 | /// When running a non-overlapping search, an "earliest" search will |
| 260 | /// return the match location as early as possible. For example, given |
| 261 | /// the patterns `abc` and `b`, and a haystack of `abc`, a normal |
| 262 | /// leftmost-first search will return `abc` as a match. But an "earliest" |
| 263 | /// search will return as soon as it is known that a match occurs, which |
| 264 | /// happens once `b` is seen. |
| 265 | /// |
| 266 | /// Note that when using [`MatchKind::Standard`], the "earliest" option |
| 267 | /// has no effect since standard semantics are already "earliest." Note |
| 268 | /// also that this has no effect in overlapping searches, since overlapping |
| 269 | /// searches also use standard semantics and report all possible matches. |
| 270 | /// |
| 271 | /// This is disabled by default. |
| 272 | /// |
| 273 | /// # Example |
| 274 | /// |
| 275 | /// This example shows the difference between "earliest" searching and |
| 276 | /// normal leftmost searching. |
| 277 | /// |
| 278 | /// ``` |
| 279 | /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind}; |
| 280 | /// |
| 281 | /// let patterns = &["abc" , "b" ]; |
| 282 | /// let haystack = "abc" ; |
| 283 | /// |
| 284 | /// let ac = AhoCorasick::builder() |
| 285 | /// .match_kind(MatchKind::LeftmostFirst) |
| 286 | /// .build(patterns) |
| 287 | /// .unwrap(); |
| 288 | /// |
| 289 | /// // The normal leftmost-first match. |
| 290 | /// let input = Input::new(haystack); |
| 291 | /// let mat = ac.try_find(input)?.expect("should have a match" ); |
| 292 | /// assert_eq!("abc" , &haystack[mat.span()]); |
| 293 | /// |
| 294 | /// // The "earliest" possible match, even if it isn't leftmost-first. |
| 295 | /// let input = Input::new(haystack).earliest(true); |
| 296 | /// let mat = ac.try_find(input)?.expect("should have a match" ); |
| 297 | /// assert_eq!("b" , &haystack[mat.span()]); |
| 298 | /// |
| 299 | /// # Ok::<(), Box<dyn std::error::Error>>(()) |
| 300 | /// ``` |
| 301 | #[inline ] |
| 302 | pub fn earliest(mut self, yes: bool) -> Input<'h> { |
| 303 | self.set_earliest(yes); |
| 304 | self |
| 305 | } |
| 306 | |
| 307 | /// Set the span for this search configuration. |
| 308 | /// |
| 309 | /// This is like the [`Input::span`] method, except this mutates the |
| 310 | /// span in place. |
| 311 | /// |
| 312 | /// This routine is generic over how a span is provided. While |
| 313 | /// a [`Span`] may be given directly, one may also provide a |
| 314 | /// `std::ops::Range<usize>`. |
| 315 | /// |
| 316 | /// # Panics |
| 317 | /// |
| 318 | /// This panics if the given span does not correspond to valid bounds in |
| 319 | /// the haystack or the termination of a search. |
| 320 | /// |
| 321 | /// # Example |
| 322 | /// |
| 323 | /// ``` |
| 324 | /// use aho_corasick::Input; |
| 325 | /// |
| 326 | /// let mut input = Input::new("foobar" ); |
| 327 | /// assert_eq!(0..6, input.get_range()); |
| 328 | /// input.set_span(2..4); |
| 329 | /// assert_eq!(2..4, input.get_range()); |
| 330 | /// ``` |
| 331 | #[inline ] |
| 332 | pub fn set_span<S: Into<Span>>(&mut self, span: S) { |
| 333 | let span = span.into(); |
| 334 | assert!( |
| 335 | span.end <= self.haystack.len() |
| 336 | && span.start <= span.end.wrapping_add(1), |
| 337 | "invalid span {:?} for haystack of length {}" , |
| 338 | span, |
| 339 | self.haystack.len(), |
| 340 | ); |
| 341 | self.span = span; |
| 342 | } |
| 343 | |
| 344 | /// Set the span for this search configuration given any range. |
| 345 | /// |
| 346 | /// This is like the [`Input::range`] method, except this mutates the |
| 347 | /// span in place. |
| 348 | /// |
| 349 | /// # Panics |
| 350 | /// |
| 351 | /// This routine will panic if the given range could not be converted |
| 352 | /// to a valid [`Range`]. For example, this would panic when given |
| 353 | /// `0..=usize::MAX` since it cannot be represented using a half-open |
| 354 | /// interval in terms of `usize`. |
| 355 | /// |
| 356 | /// This routine also panics if the given range does not correspond to |
| 357 | /// valid bounds in the haystack or the termination of a search. |
| 358 | /// |
| 359 | /// # Example |
| 360 | /// |
| 361 | /// ``` |
| 362 | /// use aho_corasick::Input; |
| 363 | /// |
| 364 | /// let mut input = Input::new("foobar" ); |
| 365 | /// assert_eq!(0..6, input.get_range()); |
| 366 | /// input.set_range(2..=4); |
| 367 | /// assert_eq!(2..5, input.get_range()); |
| 368 | /// ``` |
| 369 | #[inline ] |
| 370 | pub fn set_range<R: RangeBounds<usize>>(&mut self, range: R) { |
| 371 | use core::ops::Bound; |
| 372 | |
| 373 | // It's a little weird to convert ranges into spans, and then spans |
| 374 | // back into ranges when we actually slice the haystack. Because |
| 375 | // of that process, we always represent everything as a half-open |
| 376 | // internal. Therefore, handling things like m..=n is a little awkward. |
| 377 | let start = match range.start_bound() { |
| 378 | Bound::Included(&i) => i, |
| 379 | // Can this case ever happen? Range syntax doesn't support it... |
| 380 | Bound::Excluded(&i) => i.checked_add(1).unwrap(), |
| 381 | Bound::Unbounded => 0, |
| 382 | }; |
| 383 | let end = match range.end_bound() { |
| 384 | Bound::Included(&i) => i.checked_add(1).unwrap(), |
| 385 | Bound::Excluded(&i) => i, |
| 386 | Bound::Unbounded => self.haystack().len(), |
| 387 | }; |
| 388 | self.set_span(Span { start, end }); |
| 389 | } |
| 390 | |
| 391 | /// Set the starting offset for the span for this search configuration. |
| 392 | /// |
| 393 | /// This is a convenience routine for only mutating the start of a span |
| 394 | /// without having to set the entire span. |
| 395 | /// |
| 396 | /// # Panics |
| 397 | /// |
| 398 | /// This panics if the given span does not correspond to valid bounds in |
| 399 | /// the haystack or the termination of a search. |
| 400 | /// |
| 401 | /// # Example |
| 402 | /// |
| 403 | /// ``` |
| 404 | /// use aho_corasick::Input; |
| 405 | /// |
| 406 | /// let mut input = Input::new("foobar" ); |
| 407 | /// assert_eq!(0..6, input.get_range()); |
| 408 | /// input.set_start(5); |
| 409 | /// assert_eq!(5..6, input.get_range()); |
| 410 | /// ``` |
| 411 | #[inline ] |
| 412 | pub fn set_start(&mut self, start: usize) { |
| 413 | self.set_span(Span { start, ..self.get_span() }); |
| 414 | } |
| 415 | |
| 416 | /// Set the ending offset for the span for this search configuration. |
| 417 | /// |
| 418 | /// This is a convenience routine for only mutating the end of a span |
| 419 | /// without having to set the entire span. |
| 420 | /// |
| 421 | /// # Panics |
| 422 | /// |
| 423 | /// This panics if the given span does not correspond to valid bounds in |
| 424 | /// the haystack or the termination of a search. |
| 425 | /// |
| 426 | /// # Example |
| 427 | /// |
| 428 | /// ``` |
| 429 | /// use aho_corasick::Input; |
| 430 | /// |
| 431 | /// let mut input = Input::new("foobar" ); |
| 432 | /// assert_eq!(0..6, input.get_range()); |
| 433 | /// input.set_end(5); |
| 434 | /// assert_eq!(0..5, input.get_range()); |
| 435 | /// ``` |
| 436 | #[inline ] |
| 437 | pub fn set_end(&mut self, end: usize) { |
| 438 | self.set_span(Span { end, ..self.get_span() }); |
| 439 | } |
| 440 | |
| 441 | /// Set the anchor mode of a search. |
| 442 | /// |
| 443 | /// This is like [`Input::anchored`], except it mutates the search |
| 444 | /// configuration in place. |
| 445 | /// |
| 446 | /// # Example |
| 447 | /// |
| 448 | /// ``` |
| 449 | /// use aho_corasick::{Anchored, Input}; |
| 450 | /// |
| 451 | /// let mut input = Input::new("foobar" ); |
| 452 | /// assert_eq!(Anchored::No, input.get_anchored()); |
| 453 | /// |
| 454 | /// input.set_anchored(Anchored::Yes); |
| 455 | /// assert_eq!(Anchored::Yes, input.get_anchored()); |
| 456 | /// ``` |
| 457 | #[inline ] |
| 458 | pub fn set_anchored(&mut self, mode: Anchored) { |
| 459 | self.anchored = mode; |
| 460 | } |
| 461 | |
| 462 | /// Set whether the search should execute in "earliest" mode or not. |
| 463 | /// |
| 464 | /// This is like [`Input::earliest`], except it mutates the search |
| 465 | /// configuration in place. |
| 466 | /// |
| 467 | /// # Example |
| 468 | /// |
| 469 | /// ``` |
| 470 | /// use aho_corasick::Input; |
| 471 | /// |
| 472 | /// let mut input = Input::new("foobar" ); |
| 473 | /// assert!(!input.get_earliest()); |
| 474 | /// input.set_earliest(true); |
| 475 | /// assert!(input.get_earliest()); |
| 476 | /// ``` |
| 477 | #[inline ] |
| 478 | pub fn set_earliest(&mut self, yes: bool) { |
| 479 | self.earliest = yes; |
| 480 | } |
| 481 | |
| 482 | /// Return a borrow of the underlying haystack as a slice of bytes. |
| 483 | /// |
| 484 | /// # Example |
| 485 | /// |
| 486 | /// ``` |
| 487 | /// use aho_corasick::Input; |
| 488 | /// |
| 489 | /// let input = Input::new("foobar" ); |
| 490 | /// assert_eq!(b"foobar" , input.haystack()); |
| 491 | /// ``` |
| 492 | #[inline ] |
| 493 | pub fn haystack(&self) -> &[u8] { |
| 494 | self.haystack |
| 495 | } |
| 496 | |
| 497 | /// Return the start position of this search. |
| 498 | /// |
| 499 | /// This is a convenience routine for `search.get_span().start()`. |
| 500 | /// |
| 501 | /// # Example |
| 502 | /// |
| 503 | /// ``` |
| 504 | /// use aho_corasick::Input; |
| 505 | /// |
| 506 | /// let input = Input::new("foobar" ); |
| 507 | /// assert_eq!(0, input.start()); |
| 508 | /// |
| 509 | /// let input = Input::new("foobar" ).span(2..4); |
| 510 | /// assert_eq!(2, input.start()); |
| 511 | /// ``` |
| 512 | #[inline ] |
| 513 | pub fn start(&self) -> usize { |
| 514 | self.get_span().start |
| 515 | } |
| 516 | |
| 517 | /// Return the end position of this search. |
| 518 | /// |
| 519 | /// This is a convenience routine for `search.get_span().end()`. |
| 520 | /// |
| 521 | /// # Example |
| 522 | /// |
| 523 | /// ``` |
| 524 | /// use aho_corasick::Input; |
| 525 | /// |
| 526 | /// let input = Input::new("foobar" ); |
| 527 | /// assert_eq!(6, input.end()); |
| 528 | /// |
| 529 | /// let input = Input::new("foobar" ).span(2..4); |
| 530 | /// assert_eq!(4, input.end()); |
| 531 | /// ``` |
| 532 | #[inline ] |
| 533 | pub fn end(&self) -> usize { |
| 534 | self.get_span().end |
| 535 | } |
| 536 | |
| 537 | /// Return the span for this search configuration. |
| 538 | /// |
| 539 | /// If one was not explicitly set, then the span corresponds to the entire |
| 540 | /// range of the haystack. |
| 541 | /// |
| 542 | /// # Example |
| 543 | /// |
| 544 | /// ``` |
| 545 | /// use aho_corasick::{Input, Span}; |
| 546 | /// |
| 547 | /// let input = Input::new("foobar" ); |
| 548 | /// assert_eq!(Span { start: 0, end: 6 }, input.get_span()); |
| 549 | /// ``` |
| 550 | #[inline ] |
| 551 | pub fn get_span(&self) -> Span { |
| 552 | self.span |
| 553 | } |
| 554 | |
| 555 | /// Return the span as a range for this search configuration. |
| 556 | /// |
| 557 | /// If one was not explicitly set, then the span corresponds to the entire |
| 558 | /// range of the haystack. |
| 559 | /// |
| 560 | /// # Example |
| 561 | /// |
| 562 | /// ``` |
| 563 | /// use aho_corasick::Input; |
| 564 | /// |
| 565 | /// let input = Input::new("foobar" ); |
| 566 | /// assert_eq!(0..6, input.get_range()); |
| 567 | /// ``` |
| 568 | #[inline ] |
| 569 | pub fn get_range(&self) -> Range<usize> { |
| 570 | self.get_span().range() |
| 571 | } |
| 572 | |
| 573 | /// Return the anchored mode for this search configuration. |
| 574 | /// |
| 575 | /// If no anchored mode was set, then it defaults to [`Anchored::No`]. |
| 576 | /// |
| 577 | /// # Example |
| 578 | /// |
| 579 | /// ``` |
| 580 | /// use aho_corasick::{Anchored, Input}; |
| 581 | /// |
| 582 | /// let mut input = Input::new("foobar" ); |
| 583 | /// assert_eq!(Anchored::No, input.get_anchored()); |
| 584 | /// |
| 585 | /// input.set_anchored(Anchored::Yes); |
| 586 | /// assert_eq!(Anchored::Yes, input.get_anchored()); |
| 587 | /// ``` |
| 588 | #[inline ] |
| 589 | pub fn get_anchored(&self) -> Anchored { |
| 590 | self.anchored |
| 591 | } |
| 592 | |
| 593 | /// Return whether this search should execute in "earliest" mode. |
| 594 | /// |
| 595 | /// # Example |
| 596 | /// |
| 597 | /// ``` |
| 598 | /// use aho_corasick::Input; |
| 599 | /// |
| 600 | /// let input = Input::new("foobar" ); |
| 601 | /// assert!(!input.get_earliest()); |
| 602 | /// ``` |
| 603 | #[inline ] |
| 604 | pub fn get_earliest(&self) -> bool { |
| 605 | self.earliest |
| 606 | } |
| 607 | |
| 608 | /// Return true if this input has been exhausted, which in turn means all |
| 609 | /// subsequent searches will return no matches. |
| 610 | /// |
| 611 | /// This occurs precisely when the start position of this search is greater |
| 612 | /// than the end position of the search. |
| 613 | /// |
| 614 | /// # Example |
| 615 | /// |
| 616 | /// ``` |
| 617 | /// use aho_corasick::Input; |
| 618 | /// |
| 619 | /// let mut input = Input::new("foobar" ); |
| 620 | /// assert!(!input.is_done()); |
| 621 | /// input.set_start(6); |
| 622 | /// assert!(!input.is_done()); |
| 623 | /// input.set_start(7); |
| 624 | /// assert!(input.is_done()); |
| 625 | /// ``` |
| 626 | #[inline ] |
| 627 | pub fn is_done(&self) -> bool { |
| 628 | self.get_span().start > self.get_span().end |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | impl<'h> core::fmt::Debug for Input<'h> { |
| 633 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| 634 | let mut fmter: DebugStruct<'_, '_> = f.debug_struct(name:"Input" ); |
| 635 | match&mut DebugStruct<'_, '_> core::str::from_utf8(self.haystack()) { |
| 636 | Ok(nice) => fmter.field("haystack" , &nice), |
| 637 | Err(_) => fmter.field("haystack" , &self.haystack()), |
| 638 | } |
| 639 | .field("span" , &self.span) |
| 640 | .field("anchored" , &self.anchored) |
| 641 | .field(name:"earliest" , &self.earliest) |
| 642 | .finish() |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> { |
| 647 | #[inline ] |
| 648 | fn from(haystack: &'h H) -> Input<'h> { |
| 649 | Input::new(haystack) |
| 650 | } |
| 651 | } |
| 652 | |
| 653 | /// A representation of a range in a haystack. |
| 654 | /// |
| 655 | /// A span corresponds to the starting and ending _byte offsets_ of a |
| 656 | /// contiguous region of bytes. The starting offset is inclusive while the |
| 657 | /// ending offset is exclusive. That is, a span is a half-open interval. |
| 658 | /// |
| 659 | /// A span is used to report the offsets of a match, but it is also used to |
| 660 | /// convey which region of a haystack should be searched via routines like |
| 661 | /// [`Input::span`]. |
| 662 | /// |
| 663 | /// This is basically equivalent to a `std::ops::Range<usize>`, except this |
| 664 | /// type implements `Copy` which makes it more ergonomic to use in the context |
| 665 | /// of this crate. Indeed, `Span` exists only because `Range<usize>` does |
| 666 | /// not implement `Copy`. Like a range, this implements `Index` for `[u8]` |
| 667 | /// and `str`, and `IndexMut` for `[u8]`. For convenience, this also impls |
| 668 | /// `From<Range>`, which means things like `Span::from(5..10)` work. |
| 669 | /// |
| 670 | /// There are no constraints on the values of a span. It is, for example, legal |
| 671 | /// to create a span where `start > end`. |
| 672 | #[derive (Clone, Copy, Eq, Hash, PartialEq)] |
| 673 | pub struct Span { |
| 674 | /// The start offset of the span, inclusive. |
| 675 | pub start: usize, |
| 676 | /// The end offset of the span, exclusive. |
| 677 | pub end: usize, |
| 678 | } |
| 679 | |
| 680 | impl Span { |
| 681 | /// Returns this span as a range. |
| 682 | #[inline ] |
| 683 | pub fn range(&self) -> Range<usize> { |
| 684 | Range::from(*self) |
| 685 | } |
| 686 | |
| 687 | /// Returns true when this span is empty. That is, when `start >= end`. |
| 688 | #[inline ] |
| 689 | pub fn is_empty(&self) -> bool { |
| 690 | self.start >= self.end |
| 691 | } |
| 692 | |
| 693 | /// Returns the length of this span. |
| 694 | /// |
| 695 | /// This returns `0` in precisely the cases that `is_empty` returns `true`. |
| 696 | #[inline ] |
| 697 | pub fn len(&self) -> usize { |
| 698 | self.end.saturating_sub(self.start) |
| 699 | } |
| 700 | |
| 701 | /// Returns true when the given offset is contained within this span. |
| 702 | /// |
| 703 | /// Note that an empty span contains no offsets and will always return |
| 704 | /// false. |
| 705 | #[inline ] |
| 706 | pub fn contains(&self, offset: usize) -> bool { |
| 707 | !self.is_empty() && self.start <= offset && offset <= self.end |
| 708 | } |
| 709 | |
| 710 | /// Returns a new span with `offset` added to this span's `start` and `end` |
| 711 | /// values. |
| 712 | #[inline ] |
| 713 | pub fn offset(&self, offset: usize) -> Span { |
| 714 | Span { start: self.start + offset, end: self.end + offset } |
| 715 | } |
| 716 | } |
| 717 | |
| 718 | impl core::fmt::Debug for Span { |
| 719 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| 720 | write!(f, " {}.. {}" , self.start, self.end) |
| 721 | } |
| 722 | } |
| 723 | |
| 724 | impl core::ops::Index<Span> for [u8] { |
| 725 | type Output = [u8]; |
| 726 | |
| 727 | #[inline ] |
| 728 | fn index(&self, index: Span) -> &[u8] { |
| 729 | &self[index.range()] |
| 730 | } |
| 731 | } |
| 732 | |
| 733 | impl core::ops::IndexMut<Span> for [u8] { |
| 734 | #[inline ] |
| 735 | fn index_mut(&mut self, index: Span) -> &mut [u8] { |
| 736 | &mut self[index.range()] |
| 737 | } |
| 738 | } |
| 739 | |
| 740 | impl core::ops::Index<Span> for str { |
| 741 | type Output = str; |
| 742 | |
| 743 | #[inline ] |
| 744 | fn index(&self, index: Span) -> &str { |
| 745 | &self[index.range()] |
| 746 | } |
| 747 | } |
| 748 | |
| 749 | impl From<Range<usize>> for Span { |
| 750 | #[inline ] |
| 751 | fn from(range: Range<usize>) -> Span { |
| 752 | Span { start: range.start, end: range.end } |
| 753 | } |
| 754 | } |
| 755 | |
| 756 | impl From<Span> for Range<usize> { |
| 757 | #[inline ] |
| 758 | fn from(span: Span) -> Range<usize> { |
| 759 | Range { start: span.start, end: span.end } |
| 760 | } |
| 761 | } |
| 762 | |
| 763 | impl PartialEq<Range<usize>> for Span { |
| 764 | #[inline ] |
| 765 | fn eq(&self, range: &Range<usize>) -> bool { |
| 766 | self.start == range.start && self.end == range.end |
| 767 | } |
| 768 | } |
| 769 | |
| 770 | impl PartialEq<Span> for Range<usize> { |
| 771 | #[inline ] |
| 772 | fn eq(&self, span: &Span) -> bool { |
| 773 | self.start == span.start && self.end == span.end |
| 774 | } |
| 775 | } |
| 776 | |
| 777 | /// The type of anchored search to perform. |
| 778 | /// |
| 779 | /// If an Aho-Corasick searcher does not support the anchored mode selected, |
| 780 | /// then the search will return an error or panic, depending on whether a |
| 781 | /// fallible or an infallible routine was called. |
| 782 | #[non_exhaustive ] |
| 783 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 784 | pub enum Anchored { |
| 785 | /// Run an unanchored search. This means a match may occur anywhere at or |
| 786 | /// after the start position of the search up until the end position of the |
| 787 | /// search. |
| 788 | No, |
| 789 | /// Run an anchored search. This means that a match must begin at the start |
| 790 | /// position of the search and end before the end position of the search. |
| 791 | Yes, |
| 792 | } |
| 793 | |
| 794 | impl Anchored { |
| 795 | /// Returns true if and only if this anchor mode corresponds to an anchored |
| 796 | /// search. |
| 797 | /// |
| 798 | /// # Example |
| 799 | /// |
| 800 | /// ``` |
| 801 | /// use aho_corasick::Anchored; |
| 802 | /// |
| 803 | /// assert!(!Anchored::No.is_anchored()); |
| 804 | /// assert!(Anchored::Yes.is_anchored()); |
| 805 | /// ``` |
| 806 | #[inline ] |
| 807 | pub fn is_anchored(&self) -> bool { |
| 808 | matches!(*self, Anchored::Yes) |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | /// A representation of a match reported by an Aho-Corasick searcher. |
| 813 | /// |
| 814 | /// A match has two essential pieces of information: the [`PatternID`] that |
| 815 | /// matches, and the [`Span`] of the match in a haystack. |
| 816 | /// |
| 817 | /// The pattern is identified by an ID, which corresponds to its position |
| 818 | /// (starting from `0`) relative to other patterns used to construct the |
| 819 | /// corresponding searcher. If only a single pattern is provided, then all |
| 820 | /// matches are guaranteed to have a pattern ID of `0`. |
| 821 | /// |
| 822 | /// Every match reported by a searcher guarantees that its span has its start |
| 823 | /// offset as less than or equal to its end offset. |
| 824 | #[derive (Clone, Copy, Debug, Eq, Hash, PartialEq)] |
| 825 | pub struct Match { |
| 826 | /// The pattern ID. |
| 827 | pattern: PatternID, |
| 828 | /// The underlying match span. |
| 829 | span: Span, |
| 830 | } |
| 831 | |
| 832 | impl Match { |
| 833 | /// Create a new match from a pattern ID and a span. |
| 834 | /// |
| 835 | /// This constructor is generic over how a span is provided. While |
| 836 | /// a [`Span`] may be given directly, one may also provide a |
| 837 | /// `std::ops::Range<usize>`. |
| 838 | /// |
| 839 | /// # Panics |
| 840 | /// |
| 841 | /// This panics if `end < start`. |
| 842 | /// |
| 843 | /// # Example |
| 844 | /// |
| 845 | /// This shows how to create a match for the first pattern in an |
| 846 | /// Aho-Corasick searcher using convenient range syntax. |
| 847 | /// |
| 848 | /// ``` |
| 849 | /// use aho_corasick::{Match, PatternID}; |
| 850 | /// |
| 851 | /// let m = Match::new(PatternID::ZERO, 5..10); |
| 852 | /// assert_eq!(0, m.pattern().as_usize()); |
| 853 | /// assert_eq!(5, m.start()); |
| 854 | /// assert_eq!(10, m.end()); |
| 855 | /// ``` |
| 856 | #[inline ] |
| 857 | pub fn new<S: Into<Span>>(pattern: PatternID, span: S) -> Match { |
| 858 | let span = span.into(); |
| 859 | assert!(span.start <= span.end, "invalid match span" ); |
| 860 | Match { pattern, span } |
| 861 | } |
| 862 | |
| 863 | /// Create a new match from a pattern ID and a byte offset span. |
| 864 | /// |
| 865 | /// This constructor is generic over how a span is provided. While |
| 866 | /// a [`Span`] may be given directly, one may also provide a |
| 867 | /// `std::ops::Range<usize>`. |
| 868 | /// |
| 869 | /// This is like [`Match::new`], but accepts a `usize` instead of a |
| 870 | /// [`PatternID`]. This panics if the given `usize` is not representable |
| 871 | /// as a `PatternID`. |
| 872 | /// |
| 873 | /// # Panics |
| 874 | /// |
| 875 | /// This panics if `end < start` or if `pattern > PatternID::MAX`. |
| 876 | /// |
| 877 | /// # Example |
| 878 | /// |
| 879 | /// This shows how to create a match for the third pattern in an |
| 880 | /// Aho-Corasick searcher using convenient range syntax. |
| 881 | /// |
| 882 | /// ``` |
| 883 | /// use aho_corasick::Match; |
| 884 | /// |
| 885 | /// let m = Match::must(3, 5..10); |
| 886 | /// assert_eq!(3, m.pattern().as_usize()); |
| 887 | /// assert_eq!(5, m.start()); |
| 888 | /// assert_eq!(10, m.end()); |
| 889 | /// ``` |
| 890 | #[inline ] |
| 891 | pub fn must<S: Into<Span>>(pattern: usize, span: S) -> Match { |
| 892 | Match::new(PatternID::must(pattern), span) |
| 893 | } |
| 894 | |
| 895 | /// Returns the ID of the pattern that matched. |
| 896 | /// |
| 897 | /// The ID of a pattern is derived from the position in which it was |
| 898 | /// originally inserted into the corresponding searcher. The first pattern |
| 899 | /// has identifier `0`, and each subsequent pattern is `1`, `2` and so on. |
| 900 | #[inline ] |
| 901 | pub fn pattern(&self) -> PatternID { |
| 902 | self.pattern |
| 903 | } |
| 904 | |
| 905 | /// The starting position of the match. |
| 906 | /// |
| 907 | /// This is a convenience routine for `Match::span().start`. |
| 908 | #[inline ] |
| 909 | pub fn start(&self) -> usize { |
| 910 | self.span().start |
| 911 | } |
| 912 | |
| 913 | /// The ending position of the match. |
| 914 | /// |
| 915 | /// This is a convenience routine for `Match::span().end`. |
| 916 | #[inline ] |
| 917 | pub fn end(&self) -> usize { |
| 918 | self.span().end |
| 919 | } |
| 920 | |
| 921 | /// Returns the match span as a range. |
| 922 | /// |
| 923 | /// This is a convenience routine for `Match::span().range()`. |
| 924 | #[inline ] |
| 925 | pub fn range(&self) -> core::ops::Range<usize> { |
| 926 | self.span().range() |
| 927 | } |
| 928 | |
| 929 | /// Returns the span for this match. |
| 930 | #[inline ] |
| 931 | pub fn span(&self) -> Span { |
| 932 | self.span |
| 933 | } |
| 934 | |
| 935 | /// Returns true when the span in this match is empty. |
| 936 | /// |
| 937 | /// An empty match can only be returned when empty pattern is in the |
| 938 | /// Aho-Corasick searcher. |
| 939 | #[inline ] |
| 940 | pub fn is_empty(&self) -> bool { |
| 941 | self.span().is_empty() |
| 942 | } |
| 943 | |
| 944 | /// Returns the length of this match. |
| 945 | /// |
| 946 | /// This returns `0` in precisely the cases that `is_empty` returns `true`. |
| 947 | #[inline ] |
| 948 | pub fn len(&self) -> usize { |
| 949 | self.span().len() |
| 950 | } |
| 951 | |
| 952 | /// Returns a new match with `offset` added to its span's `start` and `end` |
| 953 | /// values. |
| 954 | #[inline ] |
| 955 | pub fn offset(&self, offset: usize) -> Match { |
| 956 | Match { |
| 957 | pattern: self.pattern, |
| 958 | span: Span { |
| 959 | start: self.start() + offset, |
| 960 | end: self.end() + offset, |
| 961 | }, |
| 962 | } |
| 963 | } |
| 964 | } |
| 965 | |
| 966 | /// A knob for controlling the match semantics of an Aho-Corasick automaton. |
| 967 | /// |
| 968 | /// There are two generally different ways that Aho-Corasick automatons can |
| 969 | /// report matches. The first way is the "standard" approach that results from |
| 970 | /// implementing most textbook explanations of Aho-Corasick. The second way is |
| 971 | /// to report only the leftmost non-overlapping matches. The leftmost approach |
| 972 | /// is in turn split into two different ways of resolving ambiguous matches: |
| 973 | /// leftmost-first and leftmost-longest. |
| 974 | /// |
| 975 | /// The `Standard` match kind is the default and is the only one that supports |
| 976 | /// overlapping matches and stream searching. (Trying to find overlapping or |
| 977 | /// streaming matches using leftmost match semantics will result in an error in |
| 978 | /// fallible APIs and a panic when using infallibe APIs.) The `Standard` match |
| 979 | /// kind will report matches as they are seen. When searching for overlapping |
| 980 | /// matches, then all possible matches are reported. When searching for |
| 981 | /// non-overlapping matches, the first match seen is reported. For example, for |
| 982 | /// non-overlapping matches, given the patterns `abcd` and `b` and the haystack |
| 983 | /// `abcdef`, only a match for `b` is reported since it is detected first. The |
| 984 | /// `abcd` match is never reported since it overlaps with the `b` match. |
| 985 | /// |
| 986 | /// In contrast, the leftmost match kind always prefers the leftmost match |
| 987 | /// among all possible matches. Given the same example as above with `abcd` and |
| 988 | /// `b` as patterns and `abcdef` as the haystack, the leftmost match is `abcd` |
| 989 | /// since it begins before the `b` match, even though the `b` match is detected |
| 990 | /// before the `abcd` match. In this case, the `b` match is not reported at all |
| 991 | /// since it overlaps with the `abcd` match. |
| 992 | /// |
| 993 | /// The difference between leftmost-first and leftmost-longest is in how they |
| 994 | /// resolve ambiguous matches when there are multiple leftmost matches to |
| 995 | /// choose from. Leftmost-first always chooses the pattern that was provided |
| 996 | /// earliest, where as leftmost-longest always chooses the longest matching |
| 997 | /// pattern. For example, given the patterns `a` and `ab` and the subject |
| 998 | /// string `ab`, the leftmost-first match is `a` but the leftmost-longest match |
| 999 | /// is `ab`. Conversely, if the patterns were given in reverse order, i.e., |
| 1000 | /// `ab` and `a`, then both the leftmost-first and leftmost-longest matches |
| 1001 | /// would be `ab`. Stated differently, the leftmost-first match depends on the |
| 1002 | /// order in which the patterns were given to the Aho-Corasick automaton. |
| 1003 | /// Because of that, when leftmost-first matching is used, if a pattern `A` |
| 1004 | /// that appears before a pattern `B` is a prefix of `B`, then it is impossible |
| 1005 | /// to ever observe a match of `B`. |
| 1006 | /// |
| 1007 | /// If you're not sure which match kind to pick, then stick with the standard |
| 1008 | /// kind, which is the default. In particular, if you need overlapping or |
| 1009 | /// streaming matches, then you _must_ use the standard kind. The leftmost |
| 1010 | /// kinds are useful in specific circumstances. For example, leftmost-first can |
| 1011 | /// be very useful as a way to implement match priority based on the order of |
| 1012 | /// patterns given and leftmost-longest can be useful for dictionary searching |
| 1013 | /// such that only the longest matching words are reported. |
| 1014 | /// |
| 1015 | /// # Relationship with regular expression alternations |
| 1016 | /// |
| 1017 | /// Understanding match semantics can be a little tricky, and one easy way |
| 1018 | /// to conceptualize non-overlapping matches from an Aho-Corasick automaton |
| 1019 | /// is to think about them as a simple alternation of literals in a regular |
| 1020 | /// expression. For example, let's say we wanted to match the strings |
| 1021 | /// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It |
| 1022 | /// turns out that regular expression engines have two different ways of |
| 1023 | /// matching this alternation. The first way, leftmost-longest, is commonly |
| 1024 | /// found in POSIX compatible implementations of regular expressions (such as |
| 1025 | /// `grep`). The second way, leftmost-first, is commonly found in backtracking |
| 1026 | /// implementations such as Perl. (Some regex engines, such as RE2 and Rust's |
| 1027 | /// regex engine do not use backtracking, but still implement leftmost-first |
| 1028 | /// semantics in an effort to match the behavior of dominant backtracking |
| 1029 | /// regex engines such as those found in Perl, Ruby, Python, Javascript and |
| 1030 | /// PHP.) |
| 1031 | /// |
| 1032 | /// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex |
| 1033 | /// will match `Samwise` because it is the longest possible match, but a |
| 1034 | /// Perl-like regex will match `Sam` since it appears earlier in the |
| 1035 | /// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine |
| 1036 | /// will never match `Samwise` since `Sam` will always have higher priority. |
| 1037 | /// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to |
| 1038 | /// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is |
| 1039 | /// still longest match, but it also appears earlier than `Sam`. |
| 1040 | /// |
| 1041 | /// The "standard" match semantics of Aho-Corasick generally don't correspond |
| 1042 | /// to the match semantics of any large group of regex implementations, so |
| 1043 | /// there's no direct analogy that can be made here. Standard match semantics |
| 1044 | /// are generally useful for overlapping matches, or if you just want to see |
| 1045 | /// matches as they are detected. |
| 1046 | /// |
| 1047 | /// The main conclusion to draw from this section is that the match semantics |
| 1048 | /// can be tweaked to precisely match either Perl-like regex alternations or |
| 1049 | /// POSIX regex alternations. |
| 1050 | #[non_exhaustive ] |
| 1051 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 1052 | pub enum MatchKind { |
| 1053 | /// Use standard match semantics, which support overlapping matches. When |
| 1054 | /// used with non-overlapping matches, matches are reported as they are |
| 1055 | /// seen. |
| 1056 | Standard, |
| 1057 | /// Use leftmost-first match semantics, which reports leftmost matches. |
| 1058 | /// When there are multiple possible leftmost matches, the match |
| 1059 | /// corresponding to the pattern that appeared earlier when constructing |
| 1060 | /// the automaton is reported. |
| 1061 | /// |
| 1062 | /// This does **not** support overlapping matches or stream searching. If |
| 1063 | /// this match kind is used, attempting to find overlapping matches or |
| 1064 | /// stream matches will fail. |
| 1065 | LeftmostFirst, |
| 1066 | /// Use leftmost-longest match semantics, which reports leftmost matches. |
| 1067 | /// When there are multiple possible leftmost matches, the longest match |
| 1068 | /// is chosen. |
| 1069 | /// |
| 1070 | /// This does **not** support overlapping matches or stream searching. If |
| 1071 | /// this match kind is used, attempting to find overlapping matches or |
| 1072 | /// stream matches will fail. |
| 1073 | LeftmostLongest, |
| 1074 | } |
| 1075 | |
| 1076 | /// The default match kind is `MatchKind::Standard`. |
| 1077 | impl Default for MatchKind { |
| 1078 | fn default() -> MatchKind { |
| 1079 | MatchKind::Standard |
| 1080 | } |
| 1081 | } |
| 1082 | |
| 1083 | impl MatchKind { |
| 1084 | #[inline ] |
| 1085 | pub(crate) fn is_standard(&self) -> bool { |
| 1086 | matches!(*self, MatchKind::Standard) |
| 1087 | } |
| 1088 | |
| 1089 | #[inline ] |
| 1090 | pub(crate) fn is_leftmost(&self) -> bool { |
| 1091 | matches!(*self, MatchKind::LeftmostFirst | MatchKind::LeftmostLongest) |
| 1092 | } |
| 1093 | |
| 1094 | #[inline ] |
| 1095 | pub(crate) fn is_leftmost_first(&self) -> bool { |
| 1096 | matches!(*self, MatchKind::LeftmostFirst) |
| 1097 | } |
| 1098 | |
| 1099 | /// Convert this match kind into a packed match kind. If this match kind |
| 1100 | /// corresponds to standard semantics, then this returns None, since |
| 1101 | /// packed searching does not support standard semantics. |
| 1102 | #[inline ] |
| 1103 | pub(crate) fn as_packed(&self) -> Option<crate::packed::MatchKind> { |
| 1104 | match *self { |
| 1105 | MatchKind::Standard => None, |
| 1106 | MatchKind::LeftmostFirst => { |
| 1107 | Some(crate::packed::MatchKind::LeftmostFirst) |
| 1108 | } |
| 1109 | MatchKind::LeftmostLongest => { |
| 1110 | Some(crate::packed::MatchKind::LeftmostLongest) |
| 1111 | } |
| 1112 | } |
| 1113 | } |
| 1114 | } |
| 1115 | |
| 1116 | /// The kind of anchored starting configurations to support in an Aho-Corasick |
| 1117 | /// searcher. |
| 1118 | /// |
| 1119 | /// Depending on which searcher is used internally by |
| 1120 | /// [`AhoCorasick`](crate::AhoCorasick), supporting both unanchored |
| 1121 | /// and anchored searches can be quite costly. For this reason, |
| 1122 | /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind) |
| 1123 | /// can be used to configure whether your searcher supports unanchored, |
| 1124 | /// anchored or both kinds of searches. |
| 1125 | /// |
| 1126 | /// This searcher configuration knob works in concert with the search time |
| 1127 | /// configuration [`Input::anchored`]. Namely, if one requests an unsupported |
| 1128 | /// anchored mode, then the search will either panic or return an error, |
| 1129 | /// depending on whether you're using infallible or fallibe APIs, respectively. |
| 1130 | /// |
| 1131 | /// `AhoCorasick` by default only supports unanchored searches. |
| 1132 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 1133 | pub enum StartKind { |
| 1134 | /// Support both anchored and unanchored searches. |
| 1135 | Both, |
| 1136 | /// Support only unanchored searches. Requesting an anchored search will |
| 1137 | /// return an error in fallible APIs and panic in infallible APIs. |
| 1138 | Unanchored, |
| 1139 | /// Support only anchored searches. Requesting an unanchored search will |
| 1140 | /// return an error in fallible APIs and panic in infallible APIs. |
| 1141 | Anchored, |
| 1142 | } |
| 1143 | |
| 1144 | impl Default for StartKind { |
| 1145 | fn default() -> StartKind { |
| 1146 | StartKind::Unanchored |
| 1147 | } |
| 1148 | } |
| 1149 | |