1/*!
2Defines a prefilter for accelerating regex searches.
3
4A prefilter can be created by building a [`Prefilter`] value.
5
6A prefilter represents one of the most important optimizations available for
7accelerating regex searches. The idea of a prefilter is to very quickly find
8candidate locations in a haystack where a regex _could_ match. Once a candidate
9is found, it is then intended for the regex engine to run at that position to
10determine whether the candidate is a match or a false positive.
11
12In the aforementioned description of the prefilter optimization also lay its
13demise. Namely, if a prefilter has a high false positive rate and it produces
14lots of candidates, then a prefilter can overall make a regex search slower.
15It can run more slowly because more time is spent ping-ponging between the
16prefilter search and the regex engine attempting to confirm each candidate as
17a match. This ping-ponging has overhead that adds up, and is exacerbated by
18a high false positive rate.
19
20Nevertheless, the optimization is still generally worth performing in most
21cases. Particularly given just how much throughput can be improved. (It is not
22uncommon for prefilter optimizations to improve throughput by one or two orders
23of magnitude.)
24
25Typically a prefilter is used to find occurrences of literal prefixes from a
26regex pattern, but this isn't required. A prefilter can be used to look for
27suffixes or even inner literals.
28
29Note that as of now, prefilters throw away information about which pattern
30each literal comes from. In other words, when a prefilter finds a match,
31there's no way to know which pattern (or patterns) it came from. Therefore,
32in order to confirm a match, you'll have to check all of the patterns by
33running the full regex engine.
34*/
35
36mod aho_corasick;
37mod byteset;
38mod memchr;
39mod memmem;
40mod teddy;
41
42use core::{
43 borrow::Borrow,
44 fmt::Debug,
45 panic::{RefUnwindSafe, UnwindSafe},
46};
47
48#[cfg(feature = "alloc")]
49use alloc::sync::Arc;
50
51#[cfg(feature = "syntax")]
52use regex_syntax::hir::{literal, Hir};
53
54use crate::util::search::{MatchKind, Span};
55
56pub(crate) use crate::util::prefilter::{
57 aho_corasick::AhoCorasick,
58 byteset::ByteSet,
59 memchr::{Memchr, Memchr2, Memchr3},
60 memmem::Memmem,
61 teddy::Teddy,
62};
63
64/// A prefilter for accelerating regex searches.
65///
66/// If you already have your literals that you want to search with,
67/// then the vanilla [`Prefilter::new`] constructor is for you. But
68/// if you have an [`Hir`] value from the `regex-syntax` crate, then
69/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses
70/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to
71/// extract literal prefixes for you, optimize them and then select and build a
72/// prefilter matcher.
73///
74/// A prefilter must have **zero false negatives**. However, by its very
75/// nature, it may produce false positives. That is, a prefilter will never
76/// skip over a position in the haystack that corresponds to a match of the
77/// original regex pattern, but it *may* produce a match for a position
78/// in the haystack that does *not* correspond to a match of the original
79/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or
80/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is
81/// upheld for you automatically. This guarantee is not preserved if you use
82/// [`Prefilter::new`] though, since it is up to the caller to provide correct
83/// literal strings with respect to the original regex pattern.
84///
85/// # Cloning
86///
87/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning
88/// it will not duplicate whatever heap memory is used to represent the
89/// underlying matcher.
90///
91/// # Example
92///
93/// This example shows how to attach a `Prefilter` to the
94/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate
95/// searches.
96///
97/// ```
98/// use regex_automata::{
99/// nfa::thompson::pikevm::PikeVM,
100/// util::prefilter::Prefilter,
101/// Match, MatchKind,
102/// };
103///
104/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "])
105/// .expect("a prefilter");
106/// let re = PikeVM::builder()
107/// .configure(PikeVM::config().prefilter(Some(pre)))
108/// .build(r"Bruce \w+")?;
109/// let mut cache = re.create_cache();
110/// assert_eq!(
111/// Some(Match::must(0, 6..23)),
112/// re.find(&mut cache, "Hello Bruce Springsteen!"),
113/// );
114/// # Ok::<(), Box<dyn std::error::Error>>(())
115/// ```
116///
117/// But note that if you get your prefilter incorrect, it could lead to an
118/// incorrect result!
119///
120/// ```
121/// use regex_automata::{
122/// nfa::thompson::pikevm::PikeVM,
123/// util::prefilter::Prefilter,
124/// Match, MatchKind,
125/// };
126///
127/// // This prefilter is wrong!
128/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "])
129/// .expect("a prefilter");
130/// let re = PikeVM::builder()
131/// .configure(PikeVM::config().prefilter(Some(pre)))
132/// .build(r"Bruce \w+")?;
133/// let mut cache = re.create_cache();
134/// // We find no match even though the regex does match.
135/// assert_eq!(
136/// None,
137/// re.find(&mut cache, "Hello Bruce Springsteen!"),
138/// );
139/// # Ok::<(), Box<dyn std::error::Error>>(())
140/// ```
141#[derive(Clone, Debug)]
142pub struct Prefilter {
143 #[cfg(not(feature = "alloc"))]
144 _unused: (),
145 #[cfg(feature = "alloc")]
146 pre: Arc<dyn PrefilterI>,
147 #[cfg(feature = "alloc")]
148 is_fast: bool,
149}
150
151impl Prefilter {
152 /// Create a new prefilter from a sequence of needles and a corresponding
153 /// match semantics.
154 ///
155 /// This may return `None` for a variety of reasons, for example, if
156 /// a suitable prefilter could not be constructed. That might occur
157 /// if they are unavailable (e.g., the `perf-literal-substring` and
158 /// `perf-literal-multisubstring` features aren't enabled), or it might
159 /// occur because of heuristics or other artifacts of how the prefilter
160 /// works.
161 ///
162 /// Note that if you have an [`Hir`] expression, it may be more convenient
163 /// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the
164 /// task of extracting prefix literals for you.
165 ///
166 /// # Example
167 ///
168 /// This example shows how match semantics can impact the matching
169 /// algorithm used by the prefilter. For this reason, it is important to
170 /// ensure that the match semantics given here are consistent with the
171 /// match semantics intended for the regular expression that the literals
172 /// were extracted from.
173 ///
174 /// ```
175 /// use regex_automata::{
176 /// util::{prefilter::Prefilter, syntax},
177 /// MatchKind, Span,
178 /// };
179 ///
180 /// let hay = "Hello samwise";
181 ///
182 /// // With leftmost-first, we find 'samwise' here because it comes
183 /// // before 'sam' in the sequence we give it..
184 /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"])
185 /// .expect("a prefilter");
186 /// assert_eq!(
187 /// Some(Span::from(6..13)),
188 /// pre.find(hay.as_bytes(), Span::from(0..hay.len())),
189 /// );
190 /// // Still with leftmost-first but with the literals reverse, now 'sam'
191 /// // will match instead!
192 /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"])
193 /// .expect("a prefilter");
194 /// assert_eq!(
195 /// Some(Span::from(6..9)),
196 /// pre.find(hay.as_bytes(), Span::from(0..hay.len())),
197 /// );
198 ///
199 /// # Ok::<(), Box<dyn std::error::Error>>(())
200 /// ```
201 pub fn new<B: AsRef<[u8]>>(
202 kind: MatchKind,
203 needles: &[B],
204 ) -> Option<Prefilter> {
205 Choice::new(kind, needles).and_then(Prefilter::from_choice)
206 }
207
208 /// This turns a prefilter selection into a `Prefilter`. That is, in turns
209 /// the enum given into a trait object.
210 fn from_choice(choice: Choice) -> Option<Prefilter> {
211 #[cfg(not(feature = "alloc"))]
212 {
213 None
214 }
215 #[cfg(feature = "alloc")]
216 {
217 let pre: Arc<dyn PrefilterI> = match choice {
218 Choice::Memchr(p) => Arc::new(p),
219 Choice::Memchr2(p) => Arc::new(p),
220 Choice::Memchr3(p) => Arc::new(p),
221 Choice::Memmem(p) => Arc::new(p),
222 Choice::Teddy(p) => Arc::new(p),
223 Choice::ByteSet(p) => Arc::new(p),
224 Choice::AhoCorasick(p) => Arc::new(p),
225 };
226 let is_fast = pre.is_fast();
227 Some(Prefilter { pre, is_fast })
228 }
229 }
230
231 /// This attempts to extract prefixes from the given `Hir` expression for
232 /// the given match semantics, and if possible, builds a prefilter for
233 /// them.
234 ///
235 /// # Example
236 ///
237 /// This example shows how to build a prefilter directly from an [`Hir`]
238 /// expression, and use to find an occurrence of a prefix from the regex
239 /// pattern.
240 ///
241 /// ```
242 /// use regex_automata::{
243 /// util::{prefilter::Prefilter, syntax},
244 /// MatchKind, Span,
245 /// };
246 ///
247 /// let hir = syntax::parse(r"(Bruce|Patti) \w+")?;
248 /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
249 /// .expect("a prefilter");
250 /// let hay = "Hello Patti Scialfa!";
251 /// assert_eq!(
252 /// Some(Span::from(6..12)),
253 /// pre.find(hay.as_bytes(), Span::from(0..hay.len())),
254 /// );
255 ///
256 /// # Ok::<(), Box<dyn std::error::Error>>(())
257 /// ```
258 #[cfg(feature = "syntax")]
259 pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option<Prefilter> {
260 Prefilter::from_hirs_prefix(kind, &[hir])
261 }
262
263 /// This attempts to extract prefixes from the given `Hir` expressions for
264 /// the given match semantics, and if possible, builds a prefilter for
265 /// them.
266 ///
267 /// Note that as of now, prefilters throw away information about which
268 /// pattern each literal comes from. In other words, when a prefilter finds
269 /// a match, there's no way to know which pattern (or patterns) it came
270 /// from. Therefore, in order to confirm a match, you'll have to check all
271 /// of the patterns by running the full regex engine.
272 ///
273 /// # Example
274 ///
275 /// This example shows how to build a prefilter directly from multiple
276 /// `Hir` expressions expression, and use it to find an occurrence of a
277 /// prefix from the regex patterns.
278 ///
279 /// ```
280 /// use regex_automata::{
281 /// util::{prefilter::Prefilter, syntax},
282 /// MatchKind, Span,
283 /// };
284 ///
285 /// let hirs = syntax::parse_many(&[
286 /// r"(Bruce|Patti) \w+",
287 /// r"Mrs?\. Doubtfire",
288 /// ])?;
289 /// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs)
290 /// .expect("a prefilter");
291 /// let hay = "Hello Mrs. Doubtfire";
292 /// assert_eq!(
293 /// Some(Span::from(6..20)),
294 /// pre.find(hay.as_bytes(), Span::from(0..hay.len())),
295 /// );
296 ///
297 /// # Ok::<(), Box<dyn std::error::Error>>(())
298 /// ```
299 #[cfg(feature = "syntax")]
300 pub fn from_hirs_prefix<H: Borrow<Hir>>(
301 kind: MatchKind,
302 hirs: &[H],
303 ) -> Option<Prefilter> {
304 prefixes(kind, hirs)
305 .literals()
306 .and_then(|lits| Prefilter::new(kind, lits))
307 }
308
309 /// Run this prefilter on `haystack[span.start..end]` and return a matching
310 /// span if one exists.
311 ///
312 /// The span returned is guaranteed to have a start position greater than
313 /// or equal to the one given, and an end position less than or equal to
314 /// the one given.
315 ///
316 /// # Example
317 ///
318 /// This example shows how to build a prefilter directly from an [`Hir`]
319 /// expression, and use it to find an occurrence of a prefix from the regex
320 /// pattern.
321 ///
322 /// ```
323 /// use regex_automata::{
324 /// util::{prefilter::Prefilter, syntax},
325 /// MatchKind, Span,
326 /// };
327 ///
328 /// let hir = syntax::parse(r"Bruce \w+")?;
329 /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
330 /// .expect("a prefilter");
331 /// let hay = "Hello Bruce Springsteen!";
332 /// assert_eq!(
333 /// Some(Span::from(6..12)),
334 /// pre.find(hay.as_bytes(), Span::from(0..hay.len())),
335 /// );
336 ///
337 /// # Ok::<(), Box<dyn std::error::Error>>(())
338 /// ```
339 #[inline]
340 pub fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
341 #[cfg(not(feature = "alloc"))]
342 {
343 unreachable!()
344 }
345 #[cfg(feature = "alloc")]
346 {
347 self.pre.find(haystack, span)
348 }
349 }
350
351 /// Returns the span of a prefix of `haystack[span.start..span.end]` if
352 /// the prefilter matches.
353 ///
354 /// The span returned is guaranteed to have a start position equivalent to
355 /// the one given, and an end position less than or equal to the one given.
356 ///
357 /// # Example
358 ///
359 /// This example shows how to build a prefilter directly from an [`Hir`]
360 /// expression, and use it to find an occurrence of a prefix from the regex
361 /// pattern that begins at the start of a haystack only.
362 ///
363 /// ```
364 /// use regex_automata::{
365 /// util::{prefilter::Prefilter, syntax},
366 /// MatchKind, Span,
367 /// };
368 ///
369 /// let hir = syntax::parse(r"Bruce \w+")?;
370 /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
371 /// .expect("a prefilter");
372 /// let hay = "Hello Bruce Springsteen!";
373 /// // Nothing is found here because 'Bruce' does
374 /// // not occur at the beginning of our search.
375 /// assert_eq!(
376 /// None,
377 /// pre.prefix(hay.as_bytes(), Span::from(0..hay.len())),
378 /// );
379 /// // But if we change where we start the search
380 /// // to begin where 'Bruce ' begins, then a
381 /// // match will be found.
382 /// assert_eq!(
383 /// Some(Span::from(6..12)),
384 /// pre.prefix(hay.as_bytes(), Span::from(6..hay.len())),
385 /// );
386 ///
387 /// # Ok::<(), Box<dyn std::error::Error>>(())
388 /// ```
389 #[inline]
390 pub fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
391 #[cfg(not(feature = "alloc"))]
392 {
393 unreachable!()
394 }
395 #[cfg(feature = "alloc")]
396 {
397 self.pre.prefix(haystack, span)
398 }
399 }
400
401 /// Returns the heap memory, in bytes, used by the underlying prefilter.
402 #[inline]
403 pub fn memory_usage(&self) -> usize {
404 #[cfg(not(feature = "alloc"))]
405 {
406 unreachable!()
407 }
408 #[cfg(feature = "alloc")]
409 {
410 self.pre.memory_usage()
411 }
412 }
413
414 /// Implementations might return true here if they believe themselves to
415 /// be "fast." The concept of "fast" is deliberately left vague, but in
416 /// practice this usually corresponds to whether it's believed that SIMD
417 /// will be used.
418 ///
419 /// Why do we care about this? Well, some prefilter tricks tend to come
420 /// with their own bits of overhead, and so might only make sense if we
421 /// know that a scan will be *much* faster than the regex engine itself.
422 /// Otherwise, the trick may not be worth doing. Whether something is
423 /// "much" faster than the regex engine generally boils down to whether
424 /// SIMD is used. (But not always. Even a SIMD matcher with a high false
425 /// positive rate can become quite slow.)
426 ///
427 /// Even if this returns true, it is still possible for the prefilter to
428 /// be "slow." Remember, prefilters are just heuristics. We can't really
429 /// *know* a prefilter will be fast without actually trying the prefilter.
430 /// (Which of course we cannot afford to do.)
431 #[inline]
432 pub(crate) fn is_fast(&self) -> bool {
433 #[cfg(not(feature = "alloc"))]
434 {
435 unreachable!()
436 }
437 #[cfg(feature = "alloc")]
438 {
439 self.is_fast
440 }
441 }
442}
443
444/// A trait for abstracting over prefilters. Basically, a prefilter is
445/// something that do an unanchored *and* an anchored search in a haystack
446/// within a given span.
447///
448/// This exists pretty much only so that we can use prefilters as a trait
449/// object (which is what `Prefilter` is). If we ever move off of trait objects
450/// and to an enum, then it's likely this trait could be removed.
451pub(crate) trait PrefilterI:
452 Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
453{
454 /// Run this prefilter on `haystack[span.start..end]` and return a matching
455 /// span if one exists.
456 ///
457 /// The span returned is guaranteed to have a start position greater than
458 /// or equal to the one given, and an end position less than or equal to
459 /// the one given.
460 fn find(&self, haystack: &[u8], span: Span) -> Option<Span>;
461
462 /// Returns the span of a prefix of `haystack[span.start..span.end]` if
463 /// the prefilter matches.
464 ///
465 /// The span returned is guaranteed to have a start position equivalent to
466 /// the one given, and an end position less than or equal to the one given.
467 fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span>;
468
469 /// Returns the heap memory, in bytes, used by the underlying prefilter.
470 fn memory_usage(&self) -> usize;
471
472 /// Implementations might return true here if they believe themselves to
473 /// be "fast." See [`Prefilter::is_fast`] for more details.
474 fn is_fast(&self) -> bool;
475}
476
477#[cfg(feature = "alloc")]
478impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
479 #[cfg_attr(feature = "perf-inline", inline(always))]
480 fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
481 (&**self).find(haystack, span)
482 }
483
484 #[cfg_attr(feature = "perf-inline", inline(always))]
485 fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
486 (&**self).prefix(haystack, span)
487 }
488
489 #[cfg_attr(feature = "perf-inline", inline(always))]
490 fn memory_usage(&self) -> usize {
491 (&**self).memory_usage()
492 }
493
494 #[cfg_attr(feature = "perf-inline", inline(always))]
495 fn is_fast(&self) -> bool {
496 (&**self).is_fast()
497 }
498}
499
500/// A type that encapsulates the selection of a prefilter algorithm from a
501/// sequence of needles.
502///
503/// The existence of this type is a little tricky, because we don't (currently)
504/// use it for performing a search. Instead, we really only consume it by
505/// converting the underlying prefilter into a trait object, whether that be
506/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order
507/// to avoid re-copying the prefilter selection logic, we isolate it here, and
508/// then force anything downstream that wants to convert it to a trait object
509/// to do trivial case analysis on it.
510///
511/// One wonders whether we *should* use an enum instead of a trait object.
512/// At time of writing, I chose trait objects based on instinct because 1) I
513/// knew I wasn't going to inline anything and 2) there would potentially be
514/// many different choices. However, as of time of writing, I haven't actually
515/// compared the trait object approach to the enum approach. That probably
516/// should be litigated, but I ran out of steam.
517///
518/// Note that if the `alloc` feature is disabled, then values of this type
519/// are (and should) never be constructed. Also, in practice, for any of the
520/// prefilters to be selected, you'll need at least one of the `perf-literal-*`
521/// features enabled.
522#[derive(Clone, Debug)]
523pub(crate) enum Choice {
524 Memchr(Memchr),
525 Memchr2(Memchr2),
526 Memchr3(Memchr3),
527 Memmem(Memmem),
528 Teddy(Teddy),
529 ByteSet(ByteSet),
530 AhoCorasick(AhoCorasick),
531}
532
533impl Choice {
534 /// Select what is believed to be the best prefilter algorithm for the
535 /// match semantics and sequence of needles given.
536 ///
537 /// This selection algorithm uses the needles as given without any
538 /// modification. For example, if `[bar]` is given, then this doesn't
539 /// try to select `memchr` for `b`. Instead, it would select `memmem`
540 /// for `bar`. If callers would want `memchr` selected for `[bar]`, then
541 /// callers should massages the literals themselves. That is, callers are
542 /// responsible for heuristics surrounding which sequence of literals is
543 /// best.
544 ///
545 /// What this selection algorithm does is attempt to use the fastest
546 /// prefilter that works for the literals given. So if `[a, b]`, is given,
547 /// then `memchr2` is selected.
548 ///
549 /// Of course, which prefilter is selected is also subject to what
550 /// is available. For example, if `alloc` isn't enabled, then
551 /// that limits which prefilters can be selected. Similarly, if
552 /// `perf-literal-substring` isn't enabled, then nothing from the `memchr`
553 /// crate can be returned.
554 pub(crate) fn new<B: AsRef<[u8]>>(
555 kind: MatchKind,
556 needles: &[B],
557 ) -> Option<Choice> {
558 // An empty set means the regex matches nothing, so no sense in
559 // building a prefilter.
560 if needles.len() == 0 {
561 debug!("prefilter building failed: found empty set of literals");
562 return None;
563 }
564 // If the regex can match the empty string, then the prefilter
565 // will by definition match at every position. This is obviously
566 // completely ineffective.
567 if needles.iter().any(|n| n.as_ref().is_empty()) {
568 debug!("prefilter building failed: literals match empty string");
569 return None;
570 }
571 // BREADCRUMBS: Perhaps the literal optimizer should special case
572 // sequences of length two or three if the leading bytes of each are
573 // "rare"? Or perhaps, if there are two or three total possible leading
574 // bytes, regardless of the number of literals, and all are rare...
575 // Then well, perhaps we should use memchr2 or memchr3 in those cases?
576 if let Some(pre) = Memchr::new(kind, needles) {
577 debug!("prefilter built: memchr");
578 return Some(Choice::Memchr(pre));
579 }
580 if let Some(pre) = Memchr2::new(kind, needles) {
581 debug!("prefilter built: memchr2");
582 return Some(Choice::Memchr2(pre));
583 }
584 if let Some(pre) = Memchr3::new(kind, needles) {
585 debug!("prefilter built: memchr3");
586 return Some(Choice::Memchr3(pre));
587 }
588 if let Some(pre) = Memmem::new(kind, needles) {
589 debug!("prefilter built: memmem");
590 return Some(Choice::Memmem(pre));
591 }
592 if let Some(pre) = Teddy::new(kind, needles) {
593 debug!("prefilter built: teddy");
594 return Some(Choice::Teddy(pre));
595 }
596 if let Some(pre) = ByteSet::new(kind, needles) {
597 debug!("prefilter built: byteset");
598 return Some(Choice::ByteSet(pre));
599 }
600 if let Some(pre) = AhoCorasick::new(kind, needles) {
601 debug!("prefilter built: aho-corasick");
602 return Some(Choice::AhoCorasick(pre));
603 }
604 debug!("prefilter building failed: no strategy could be found");
605 None
606 }
607}
608
609/// Extracts all of the prefix literals from the given HIR expressions into a
610/// single `Seq`. The literals in the sequence are ordered with respect to the
611/// order of the given HIR expressions and consistent with the match semantics
612/// given.
613///
614/// The sequence returned is "optimized." That is, they may be shrunk or even
615/// truncated according to heuristics with the intent of making them more
616/// useful as a prefilter. (Which translates to both using faster algorithms
617/// and minimizing the false positive rate.)
618///
619/// Note that this erases any connection between the literals and which pattern
620/// (or patterns) they came from.
621///
622/// The match kind given must correspond to the match semantics of the regex
623/// that is represented by the HIRs given. The match semantics may change the
624/// literal sequence returned.
625#[cfg(feature = "syntax")]
626pub(crate) fn prefixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
627where
628 H: core::borrow::Borrow<Hir>,
629{
630 let mut extractor = literal::Extractor::new();
631 extractor.kind(literal::ExtractKind::Prefix);
632
633 let mut prefixes = literal::Seq::empty();
634 for hir in hirs {
635 prefixes.union(&mut extractor.extract(hir.borrow()));
636 }
637 debug!(
638 "prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
639 prefixes.len(),
640 prefixes.is_exact(),
641 prefixes
642 );
643 match kind {
644 MatchKind::All => {
645 prefixes.sort();
646 prefixes.dedup();
647 }
648 MatchKind::LeftmostFirst => {
649 prefixes.optimize_for_prefix_by_preference();
650 }
651 }
652 debug!(
653 "prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
654 prefixes.len(),
655 prefixes.is_exact(),
656 prefixes
657 );
658 prefixes
659}
660
661/// Like `prefixes`, but for all suffixes of all matches for the given HIRs.
662#[cfg(feature = "syntax")]
663pub(crate) fn suffixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
664where
665 H: core::borrow::Borrow<Hir>,
666{
667 let mut extractor = literal::Extractor::new();
668 extractor.kind(literal::ExtractKind::Suffix);
669
670 let mut suffixes = literal::Seq::empty();
671 for hir in hirs {
672 suffixes.union(&mut extractor.extract(hir.borrow()));
673 }
674 debug!(
675 "suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
676 suffixes.len(),
677 suffixes.is_exact(),
678 suffixes
679 );
680 match kind {
681 MatchKind::All => {
682 suffixes.sort();
683 suffixes.dedup();
684 }
685 MatchKind::LeftmostFirst => {
686 suffixes.optimize_for_suffix_by_preference();
687 }
688 }
689 debug!(
690 "suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
691 suffixes.len(),
692 suffixes.is_exact(),
693 suffixes
694 );
695 suffixes
696}
697