mod.rs - Codebrowser

1	/!*
2	Defines a prefilter for accelerating regex searches.
3
4	A prefilter can be created by building a [`Prefilter`] value.
5
6	A prefilter represents one of the most important optimizations available for
7	accelerating regex searches. The idea of a prefilter is to very quickly find
8	candidate locations in a haystack where a regex _could_ match. Once a candidate
9	is found, it is then intended for the regex engine to run at that position to
10	determine whether the candidate is a match or a false positive.
11
12	In the aforementioned description of the prefilter optimization also lay its
13	demise. Namely, if a prefilter has a high false positive rate and it produces
14	lots of candidates, then a prefilter can overall make a regex search slower.
15	It can run more slowly because more time is spent ping-ponging between the
16	prefilter search and the regex engine attempting to confirm each candidate as
17	a match. This ping-ponging has overhead that adds up, and is exacerbated by
18	a high false positive rate.
19
20	Nevertheless, the optimization is still generally worth performing in most
21	cases. Particularly given just how much throughput can be improved. (It is not
22	uncommon for prefilter optimizations to improve throughput by one or two orders
23	of magnitude.)
24
25	Typically a prefilter is used to find occurrences of literal prefixes from a
26	regex pattern, but this isn't required. A prefilter can be used to look for
27	suffixes or even inner literals.
28
29	Note that as of now, prefilters throw away information about which pattern
30	each literal comes from. In other words, when a prefilter finds a match,
31	there's no way to know which pattern (or patterns) it came from. Therefore,
32	in order to confirm a match, you'll have to check all of the patterns by
33	running the full regex engine.
34	*/
35
36	mod aho_corasick;
37	mod byteset;
38	mod memchr;
39	mod memmem;
40	mod teddy;
41
42	use core::{
43	borrow::Borrow,
44	fmt::Debug,
45	panic::{RefUnwindSafe, UnwindSafe},
46	};
47
48	#[cfg(feature = "alloc")]
49	use alloc::sync::Arc;
50
51	#[cfg(feature = "syntax")]
52	use regex_syntax::hir::{literal, Hir};
53
54	use crate::util::search::{MatchKind, Span};
55
56	pub(crate) use crate::util::prefilter::{
57	aho_corasick::AhoCorasick,
58	byteset::ByteSet,
59	memchr::{Memchr, Memchr2, Memchr3},
60	memmem::Memmem,
61	teddy::Teddy,
62	};
63
64	/// A prefilter for accelerating regex searches.
65	///
66	/// If you already have your literals that you want to search with,
67	/// then the vanilla [`Prefilter::new`] constructor is for you. But
68	/// if you have an [`Hir`] value from the `regex-syntax` crate, then
69	/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses
70	/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to
71	/// extract literal prefixes for you, optimize them and then select and build a
72	/// prefilter matcher.
73	///
74	/// A prefilter must have zero false negatives. However, by its very
75	/// nature, it may produce false positives. That is, a prefilter will never
76	/// skip over a position in the haystack that corresponds to a match of the
77	/// original regex pattern, but it may* produce a match for a position*
78	/// in the haystack that does not* correspond to a match of the original*
79	/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or
80	/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is
81	/// upheld for you automatically. This guarantee is not preserved if you use
82	/// [`Prefilter::new`] though, since it is up to the caller to provide correct
83	/// literal strings with respect to the original regex pattern.
84	///
85	/// # Cloning
86	///
87	/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning
88	/// it will not duplicate whatever heap memory is used to represent the
89	/// underlying matcher.
90	///
91	/// # Example
92	///
93	/// This example shows how to attach a `Prefilter` to the
94	/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate
95	/// searches.
96	///
97	/// ```
98	/// use regex_automata::{
99	/// nfa::thompson::pikevm::PikeVM,
100	/// util::prefilter::Prefilter,
101	/// Match, MatchKind,
102	/// };
103	///
104	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "])
105	/// .expect("a prefilter");
106	/// let re = PikeVM::builder()
107	/// .configure(PikeVM::config().prefilter(Some(pre)))
108	/// .build(r"Bruce \w+")?;
109	/// let mut cache = re.create_cache();
110	/// assert_eq!(
111	/// Some(Match::must(`0`, `6`..`23`)),
112	/// re.find(&mut cache, "Hello Bruce Springsteen!"),
113	/// );
114	/// # Ok::<(), Box<dyn std::error::Error>>(())
115	/// ```
116	///
117	/// But note that if you get your prefilter incorrect, it could lead to an
118	/// incorrect result!
119	///
120	/// ```
121	/// use regex_automata::{
122	/// nfa::thompson::pikevm::PikeVM,
123	/// util::prefilter::Prefilter,
124	/// Match, MatchKind,
125	/// };
126	///
127	/// // This prefilter is wrong!
128	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "])
129	/// .expect("a prefilter");
130	/// let re = PikeVM::builder()
131	/// .configure(PikeVM::config().prefilter(Some(pre)))
132	/// .build(r"Bruce \w+")?;
133	/// let mut cache = re.create_cache();
134	/// // We find no match even though the regex does match.
135	/// assert_eq!(
136	/// None,
137	/// re.find(&mut cache, "Hello Bruce Springsteen!"),
138	/// );
139	/// # Ok::<(), Box<dyn std::error::Error>>(())
140	/// ```
141	#[derive(Clone, Debug)]
142	pub struct Prefilter {
143	#[cfg(not(feature = "alloc"))]
144	_unused: (),
145	#[cfg(feature = "alloc")]
146	pre: Arc<dyn PrefilterI>,
147	#[cfg(feature = "alloc")]
148	is_fast: bool,
149	}
150
151	impl Prefilter {
152	/// Create a new prefilter from a sequence of needles and a corresponding
153	/// match semantics.
154	///
155	/// This may return `None` for a variety of reasons, for example, if
156	/// a suitable prefilter could not be constructed. That might occur
157	/// if they are unavailable (e.g., the `perf-literal-substring` and
158	/// `perf-literal-multisubstring` features aren't enabled), or it might
159	/// occur because of heuristics or other artifacts of how the prefilter
160	/// works.
161	///
162	/// Note that if you have an [`Hir`] expression, it may be more convenient
163	/// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the
164	/// task of extracting prefix literals for you.
165	///
166	/// # Example
167	///
168	/// This example shows how match semantics can impact the matching
169	/// algorithm used by the prefilter. For this reason, it is important to
170	/// ensure that the match semantics given here are consistent with the
171	/// match semantics intended for the regular expression that the literals
172	/// were extracted from.
173	///
174	/// ```
175	/// use regex_automata::{
176	/// util::{prefilter::Prefilter, syntax},
177	/// MatchKind, Span,
178	/// };
179	///
180	/// let hay = "Hello samwise";
181	///
182	/// // With leftmost-first, we find 'samwise' here because it comes
183	/// // before 'sam' in the sequence we give it..
184	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"])
185	/// .expect("a prefilter");
186	/// assert_eq!(
187	/// Some(Span::from(`6`..`13`)),
188	/// pre.find(hay.as_bytes(), Span::from(`0`..hay.len())),
189	/// );
190	/// // Still with leftmost-first but with the literals reverse, now 'sam'
191	/// // will match instead!
192	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"])
193	/// .expect("a prefilter");
194	/// assert_eq!(
195	/// Some(Span::from(`6`..`9`)),
196	/// pre.find(hay.as_bytes(), Span::from(`0`..hay.len())),
197	/// );
198	///
199	/// # Ok::<(), Box<dyn std::error::Error>>(())
200	/// ```
201	pub fn new<B: AsRef<[u8]>>(
202	kind: MatchKind,
203	needles: &[B],
204	) -> Option<Prefilter> {
205	Choice::new(kind, needles).and_then(Prefilter::from_choice)
206	}
207
208	/// This turns a prefilter selection into a `Prefilter`. That is, in turns
209	/// the enum given into a trait object.
210	fn from_choice(choice: Choice) -> Option<Prefilter> {
211	#[cfg(not(feature = "alloc"))]
212	{
213	None
214	}
215	#[cfg(feature = "alloc")]
216	{
217	let pre: Arc<dyn PrefilterI> = match choice {
218	Choice::Memchr(p) => Arc::new(p),
219	Choice::Memchr2(p) => Arc::new(p),
220	Choice::Memchr3(p) => Arc::new(p),
221	Choice::Memmem(p) => Arc::new(p),
222	Choice::Teddy(p) => Arc::new(p),
223	Choice::ByteSet(p) => Arc::new(p),
224	Choice::AhoCorasick(p) => Arc::new(p),
225	};
226	let is_fast = pre.is_fast();
227	Some(Prefilter { pre, is_fast })
228	}
229	}
230
231	/// This attempts to extract prefixes from the given `Hir` expression for
232	/// the given match semantics, and if possible, builds a prefilter for
233	/// them.
234	///
235	/// # Example
236	///
237	/// This example shows how to build a prefilter directly from an [`Hir`]
238	/// expression, and use to find an occurrence of a prefix from the regex
239	/// pattern.
240	///
241	/// ```
242	/// use regex_automata::{
243	/// util::{prefilter::Prefilter, syntax},
244	/// MatchKind, Span,
245	/// };
246	///
247	/// let hir = syntax::parse(r"(Bruce\|Patti) \w+")?;
248	/// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
249	/// .expect("a prefilter");
250	/// let hay = "Hello Patti Scialfa!";
251	/// assert_eq!(
252	/// Some(Span::from(`6`..`12`)),
253	/// pre.find(hay.as_bytes(), Span::from(`0`..hay.len())),
254	/// );
255	///
256	/// # Ok::<(), Box<dyn std::error::Error>>(())
257	/// ```
258	#[cfg(feature = "syntax")]
259	pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option<Prefilter> {
260	Prefilter::from_hirs_prefix(kind, &[hir])
261	}
262
263	/// This attempts to extract prefixes from the given `Hir` expressions for
264	/// the given match semantics, and if possible, builds a prefilter for
265	/// them.
266	///
267	/// Note that as of now, prefilters throw away information about which
268	/// pattern each literal comes from. In other words, when a prefilter finds
269	/// a match, there's no way to know which pattern (or patterns) it came
270	/// from. Therefore, in order to confirm a match, you'll have to check all
271	/// of the patterns by running the full regex engine.
272	///
273	/// # Example
274	///
275	/// This example shows how to build a prefilter directly from multiple
276	/// `Hir` expressions expression, and use it to find an occurrence of a
277	/// prefix from the regex patterns.
278	///
279	/// ```
280	/// use regex_automata::{
281	/// util::{prefilter::Prefilter, syntax},
282	/// MatchKind, Span,
283	/// };
284	///
285	/// let hirs = syntax::parse_many(&[
286	/// r"(Bruce\|Patti) \w+",
287	/// r"Mrs?\. Doubtfire",
288	/// ])?;
289	/// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs)
290	/// .expect("a prefilter");
291	/// let hay = "Hello Mrs. Doubtfire";
292	/// assert_eq!(
293	/// Some(Span::from(`6`..`20`)),
294	/// pre.find(hay.as_bytes(), Span::from(`0`..hay.len())),
295	/// );
296	///
297	/// # Ok::<(), Box<dyn std::error::Error>>(())
298	/// ```
299	#[cfg(feature = "syntax")]
300	pub fn from_hirs_prefix<H: Borrow<Hir>>(
301	kind: MatchKind,
302	hirs: &[H],
303	) -> Option<Prefilter> {
304	prefixes(kind, hirs)
305	.literals()
306	.and_then(\|lits\| Prefilter::new(kind, lits))
307	}
308
309	/// Run this prefilter on `haystack[span.start..end]` and return a matching
310	/// span if one exists.
311	///
312	/// The span returned is guaranteed to have a start position greater than
313	/// or equal to the one given, and an end position less than or equal to
314	/// the one given.
315	///
316	/// # Example
317	///
318	/// This example shows how to build a prefilter directly from an [`Hir`]
319	/// expression, and use it to find an occurrence of a prefix from the regex
320	/// pattern.
321	///
322	/// ```
323	/// use regex_automata::{
324	/// util::{prefilter::Prefilter, syntax},
325	/// MatchKind, Span,
326	/// };
327	///
328	/// let hir = syntax::parse(r"Bruce \w+")?;
329	/// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
330	/// .expect("a prefilter");
331	/// let hay = "Hello Bruce Springsteen!";
332	/// assert_eq!(
333	/// Some(Span::from(`6`..`12`)),
334	/// pre.find(hay.as_bytes(), Span::from(`0`..hay.len())),
335	/// );
336	///
337	/// # Ok::<(), Box<dyn std::error::Error>>(())
338	/// ```
339	#[inline]
340	pub fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
341	#[cfg(not(feature = "alloc"))]
342	{
343	unreachable!()
344	}
345	#[cfg(feature = "alloc")]
346	{
347	self.pre.find(haystack, span)
348	}
349	}
350
351	/// Returns the span of a prefix of `haystack[span.start..span.end]` if
352	/// the prefilter matches.
353	///
354	/// The span returned is guaranteed to have a start position equivalent to
355	/// the one given, and an end position less than or equal to the one given.
356	///
357	/// # Example
358	///
359	/// This example shows how to build a prefilter directly from an [`Hir`]
360	/// expression, and use it to find an occurrence of a prefix from the regex
361	/// pattern that begins at the start of a haystack only.
362	///
363	/// ```
364	/// use regex_automata::{
365	/// util::{prefilter::Prefilter, syntax},
366	/// MatchKind, Span,
367	/// };
368	///
369	/// let hir = syntax::parse(r"Bruce \w+")?;
370	/// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
371	/// .expect("a prefilter");
372	/// let hay = "Hello Bruce Springsteen!";
373	/// // Nothing is found here because 'Bruce' does
374	/// // not occur at the beginning of our search.
375	/// assert_eq!(
376	/// None,
377	/// pre.prefix(hay.as_bytes(), Span::from(`0`..hay.len())),
378	/// );
379	/// // But if we change where we start the search
380	/// // to begin where 'Bruce ' begins, then a
381	/// // match will be found.
382	/// assert_eq!(
383	/// Some(Span::from(`6`..`12`)),
384	/// pre.prefix(hay.as_bytes(), Span::from(`6`..hay.len())),
385	/// );
386	///
387	/// # Ok::<(), Box<dyn std::error::Error>>(())
388	/// ```
389	#[inline]
390	pub fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
391	#[cfg(not(feature = "alloc"))]
392	{
393	unreachable!()
394	}
395	#[cfg(feature = "alloc")]
396	{
397	self.pre.prefix(haystack, span)
398	}
399	}
400
401	/// Returns the heap memory, in bytes, used by the underlying prefilter.
402	#[inline]
403	pub fn memory_usage(&self) -> usize {
404	#[cfg(not(feature = "alloc"))]
405	{
406	unreachable!()
407	}
408	#[cfg(feature = "alloc")]
409	{
410	self.pre.memory_usage()
411	}
412	}
413
414	/// Implementations might return true here if they believe themselves to
415	/// be "fast." The concept of "fast" is deliberately left vague, but in
416	/// practice this usually corresponds to whether it's believed that SIMD
417	/// will be used.
418	///
419	/// Why do we care about this? Well, some prefilter tricks tend to come
420	/// with their own bits of overhead, and so might only make sense if we
421	/// know that a scan will be much* faster than the regex engine itself.*
422	/// Otherwise, the trick may not be worth doing. Whether something is
423	/// "much" faster than the regex engine generally boils down to whether
424	/// SIMD is used. (But not always. Even a SIMD matcher with a high false
425	/// positive rate can become quite slow.)
426	///
427	/// Even if this returns true, it is still possible for the prefilter to
428	/// be "slow." Remember, prefilters are just heuristics. We can't really
429	/// know* a prefilter will be fast without actually trying the prefilter.*
430	/// (Which of course we cannot afford to do.)
431	#[inline]
432	pub(crate) fn is_fast(&self) -> bool {
433	#[cfg(not(feature = "alloc"))]
434	{
435	unreachable!()
436	}
437	#[cfg(feature = "alloc")]
438	{
439	self.is_fast
440	}
441	}
442	}
443
444	/// A trait for abstracting over prefilters. Basically, a prefilter is
445	/// something that do an unanchored and* an anchored search in a haystack*
446	/// within a given span.
447	///
448	/// This exists pretty much only so that we can use prefilters as a trait
449	/// object (which is what `Prefilter` is). If we ever move off of trait objects
450	/// and to an enum, then it's likely this trait could be removed.
451	pub(crate) trait PrefilterI:
452	Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
453	{
454	/// Run this prefilter on `haystack[span.start..end]` and return a matching
455	/// span if one exists.
456	///
457	/// The span returned is guaranteed to have a start position greater than
458	/// or equal to the one given, and an end position less than or equal to
459	/// the one given.
460	fn find(&self, haystack: &[u8], span: Span) -> Option<Span>;
461
462	/// Returns the span of a prefix of `haystack[span.start..span.end]` if
463	/// the prefilter matches.
464	///
465	/// The span returned is guaranteed to have a start position equivalent to
466	/// the one given, and an end position less than or equal to the one given.
467	fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span>;
468
469	/// Returns the heap memory, in bytes, used by the underlying prefilter.
470	fn memory_usage(&self) -> usize;
471
472	/// Implementations might return true here if they believe themselves to
473	/// be "fast." See [`Prefilter::is_fast`] for more details.
474	fn is_fast(&self) -> bool;
475	}
476
477	#[cfg(feature = "alloc")]
478	impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
479	#[cfg_attr(feature = "perf-inline", inline(always))]
480	fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
481	(&**self).find(haystack, span)
482	}
483
484	#[cfg_attr(feature = "perf-inline", inline(always))]
485	fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
486	(&**self).prefix(haystack, span)
487	}
488
489	#[cfg_attr(feature = "perf-inline", inline(always))]
490	fn memory_usage(&self) -> usize {
491	(&**self).memory_usage()
492	}
493
494	#[cfg_attr(feature = "perf-inline", inline(always))]
495	fn is_fast(&self) -> bool {
496	(&**self).is_fast()
497	}
498	}
499
500	/// A type that encapsulates the selection of a prefilter algorithm from a
501	/// sequence of needles.
502	///
503	/// The existence of this type is a little tricky, because we don't (currently)
504	/// use it for performing a search. Instead, we really only consume it by
505	/// converting the underlying prefilter into a trait object, whether that be
506	/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order
507	/// to avoid re-copying the prefilter selection logic, we isolate it here, and
508	/// then force anything downstream that wants to convert it to a trait object
509	/// to do trivial case analysis on it.
510	///
511	/// One wonders whether we should* use an enum instead of a trait object.*
512	/// At time of writing, I chose trait objects based on instinct because 1) I
513	/// knew I wasn't going to inline anything and 2) there would potentially be
514	/// many different choices. However, as of time of writing, I haven't actually
515	/// compared the trait object approach to the enum approach. That probably
516	/// should be litigated, but I ran out of steam.
517	///
518	/// Note that if the `alloc` feature is disabled, then values of this type
519	/// are (and should) never be constructed. Also, in practice, for any of the
520	/// prefilters to be selected, you'll need at least one of the `perf-literal-`*
521	/// features enabled.
522	#[derive(Clone, Debug)]
523	pub(crate) enum Choice {
524	Memchr(Memchr),
525	Memchr2(Memchr2),
526	Memchr3(Memchr3),
527	Memmem(Memmem),
528	Teddy(Teddy),
529	ByteSet(ByteSet),
530	AhoCorasick(AhoCorasick),
531	}
532
533	impl Choice {
534	/// Select what is believed to be the best prefilter algorithm for the
535	/// match semantics and sequence of needles given.
536	///
537	/// This selection algorithm uses the needles as given without any
538	/// modification. For example, if `[bar]` is given, then this doesn't
539	/// try to select `memchr` for `b`. Instead, it would select `memmem`
540	/// for `bar`. If callers would want `memchr` selected for `[bar]`, then
541	/// callers should massages the literals themselves. That is, callers are
542	/// responsible for heuristics surrounding which sequence of literals is
543	/// best.
544	///
545	/// What this selection algorithm does is attempt to use the fastest
546	/// prefilter that works for the literals given. So if `[a, b]`, is given,
547	/// then `memchr2` is selected.
548	///
549	/// Of course, which prefilter is selected is also subject to what
550	/// is available. For example, if `alloc` isn't enabled, then
551	/// that limits which prefilters can be selected. Similarly, if
552	/// `perf-literal-substring` isn't enabled, then nothing from the `memchr`
553	/// crate can be returned.
554	pub(crate) fn new<B: AsRef<[u8]>>(
555	kind: MatchKind,
556	needles: &[B],
557	) -> Option<Choice> {
558	// An empty set means the regex matches nothing, so no sense in
559	// building a prefilter.
560	if needles.len() == `0` {
561	debug!("prefilter building failed: found empty set of literals");
562	return None;
563	}
564	// If the regex can match the empty string, then the prefilter
565	// will by definition match at every position. This is obviously
566	// completely ineffective.
567	if needles.iter().any(\|n\| n.as_ref().is_empty()) {
568	debug!("prefilter building failed: literals match empty string");
569	return None;
570	}
571	// BREADCRUMBS: Perhaps the literal optimizer should special case
572	// sequences of length two or three if the leading bytes of each are
573	// "rare"? Or perhaps, if there are two or three total possible leading
574	// bytes, regardless of the number of literals, and all are rare...
575	// Then well, perhaps we should use memchr2 or memchr3 in those cases?
576	if let Some(pre) = Memchr::new(kind, needles) {
577	debug!("prefilter built: memchr");
578	return Some(Choice::Memchr(pre));
579	}
580	if let Some(pre) = Memchr2::new(kind, needles) {
581	debug!("prefilter built: memchr2");
582	return Some(Choice::Memchr2(pre));
583	}
584	if let Some(pre) = Memchr3::new(kind, needles) {
585	debug!("prefilter built: memchr3");
586	return Some(Choice::Memchr3(pre));
587	}
588	if let Some(pre) = Memmem::new(kind, needles) {
589	debug!("prefilter built: memmem");
590	return Some(Choice::Memmem(pre));
591	}
592	if let Some(pre) = Teddy::new(kind, needles) {
593	debug!("prefilter built: teddy");
594	return Some(Choice::Teddy(pre));
595	}
596	if let Some(pre) = ByteSet::new(kind, needles) {
597	debug!("prefilter built: byteset");
598	return Some(Choice::ByteSet(pre));
599	}
600	if let Some(pre) = AhoCorasick::new(kind, needles) {
601	debug!("prefilter built: aho-corasick");
602	return Some(Choice::AhoCorasick(pre));
603	}
604	debug!("prefilter building failed: no strategy could be found");
605	None
606	}
607	}
608
609	/// Extracts all of the prefix literals from the given HIR expressions into a
610	/// single `Seq`. The literals in the sequence are ordered with respect to the
611	/// order of the given HIR expressions and consistent with the match semantics
612	/// given.
613	///
614	/// The sequence returned is "optimized." That is, they may be shrunk or even
615	/// truncated according to heuristics with the intent of making them more
616	/// useful as a prefilter. (Which translates to both using faster algorithms
617	/// and minimizing the false positive rate.)
618	///
619	/// Note that this erases any connection between the literals and which pattern
620	/// (or patterns) they came from.
621	///
622	/// The match kind given must correspond to the match semantics of the regex
623	/// that is represented by the HIRs given. The match semantics may change the
624	/// literal sequence returned.
625	#[cfg(feature = "syntax")]
626	pub(crate) fn prefixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
627	where
628	H: core::borrow::Borrow<Hir>,
629	{
630	let mut extractor = literal::Extractor::new();
631	extractor.kind(literal::ExtractKind::Prefix);
632
633	let mut prefixes = literal::Seq::empty();
634	for hir in hirs {
635	prefixes.union(&mut extractor.extract(hir.borrow()));
636	}
637	debug!(
638	"prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
639	prefixes.len(),
640	prefixes.is_exact(),
641	prefixes
642	);
643	match kind {
644	MatchKind::All => {
645	prefixes.sort();
646	prefixes.dedup();
647	}
648	MatchKind::LeftmostFirst => {
649	prefixes.optimize_for_prefix_by_preference();
650	}
651	}
652	debug!(
653	"prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
654	prefixes.len(),
655	prefixes.is_exact(),
656	prefixes
657	);
658	prefixes
659	}
660
661	/// Like `prefixes`, but for all suffixes of all matches for the given HIRs.
662	#[cfg(feature = "syntax")]
663	pub(crate) fn suffixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
664	where
665	H: core::borrow::Borrow<Hir>,
666	{
667	let mut extractor = literal::Extractor::new();
668	extractor.kind(literal::ExtractKind::Suffix);
669
670	let mut suffixes = literal::Seq::empty();
671	for hir in hirs {
672	suffixes.union(&mut extractor.extract(hir.borrow()));
673	}
674	debug!(
675	"suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
676	suffixes.len(),
677	suffixes.is_exact(),
678	suffixes
679	);
680	match kind {
681	MatchKind::All => {
682	suffixes.sort();
683	suffixes.dedup();
684	}
685	MatchKind::LeftmostFirst => {
686	suffixes.optimize_for_suffix_by_preference();
687	}
688	}
689	debug!(
690	"suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
691	suffixes.len(),
692	suffixes.is_exact(),
693	suffixes
694	);
695	suffixes
696	}
697

Provided by KDAB

Definitions