regex.rs source code [crates/regex-automata-0.4.6/src/meta/regex.rs]

1	use core::{
2	borrow::Borrow,
3	panic::{RefUnwindSafe, UnwindSafe},
4	};
5
6	use alloc::{boxed::Box, sync::Arc, vec, vec::Vec};
7
8	use regex_syntax::{
9	ast,
10	hir::{self, Hir},
11	};
12
13	use crate::{
14	meta::{
15	error::BuildError,
16	strategy::{self, Strategy},
17	wrappers,
18	},
19	nfa::thompson::WhichCaptures,
20	util::{
21	captures::{Captures, GroupInfo},
22	iter,
23	pool::{Pool, PoolGuard},
24	prefilter::Prefilter,
25	primitives::{NonMaxUsize, PatternID},
26	search::{HalfMatch, Input, Match, MatchKind, PatternSet, Span},
27	},
28	};
29
30	/// A type alias for our pool of meta::Cache that fixes the type parameters to
31	/// what we use for the meta regex below.
32	type CachePool = Pool<Cache, CachePoolFn>;
33
34	/// Same as above, but for the guard returned by a pool.
35	type CachePoolGuard<'a> = PoolGuard<'a, Cache, CachePoolFn>;
36
37	/// The type of the closure we use to create new caches. We need to spell out
38	/// all of the marker traits or else we risk leaking !MARKER impls.
39	type CachePoolFn =
40	Box<dyn Fn() -> Cache + Send + Sync + UnwindSafe + RefUnwindSafe>;
41
42	/// A regex matcher that works by composing several other regex matchers
43	/// automatically.
44	///
45	/// In effect, a meta regex papers over a lot of the quirks or performance
46	/// problems in each of the regex engines in this crate. Its goal is to provide
47	/// an infallible and simple API that "just does the right thing" in the common
48	/// case.
49	///
50	/// A meta regex is the implementation of a `Regex` in the `regex` crate.
51	/// Indeed, the `regex` crate API is essentially just a light wrapper over
52	/// this type. This includes the `regex` crate's `RegexSet` API!
53	///
54	/// # Composition
55	///
56	/// This is called a "meta" matcher precisely because it uses other regex
57	/// matchers to provide a convenient high level regex API. Here are some
58	/// examples of how other regex matchers are composed:
59	///
60	/// When calling* [`Regex::captures`], instead of immediately
61	/// running a slower but more capable regex engine like the
62	/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM), the meta regex engine
63	/// will usually first look for the bounds of a match with a higher throughput
64	/// regex engine like a [lazy DFA](crate::hybrid). Only when a match is found
65	/// is a slower engine like `PikeVM` used to find the matching span for each
66	/// capture group.
67	/// While higher throughout engines like the lazy DFA cannot handle*
68	/// Unicode word boundaries in general, they can still be used on pure ASCII
69	/// haystacks by pretending that Unicode word boundaries are just plain ASCII
70	/// word boundaries. However, if a haystack is not ASCII, the meta regex engine
71	/// will automatically switch to a (possibly slower) regex engine that supports
72	/// Unicode word boundaries in general.
73	/// In some cases where a regex pattern is just a simple literal or a small*
74	/// set of literals, an actual regex engine won't be used at all. Instead,
75	/// substring or multi-substring search algorithms will be employed.
76	///
77	/// There are many other forms of composition happening too, but the above
78	/// should give a general idea. In particular, it may perhaps be surprising
79	/// that multiple* regex engines might get executed for a single search. That*
80	/// is, the decision of what regex engine to use is not _just_ based on the
81	/// pattern, but also based on the dynamic execution of the search itself.
82	///
83	/// The primary reason for this composition is performance. The fundamental
84	/// tension is that the faster engines tend to be less capable, and the more
85	/// capable engines tend to be slower.
86	///
87	/// Note that the forms of composition that are allowed are determined by
88	/// compile time crate features and configuration. For example, if the `hybrid`
89	/// feature isn't enabled, or if [`Config::hybrid`] has been disabled, then the
90	/// meta regex engine will never use a lazy DFA.
91	///
92	/// # Synchronization and cloning
93	///
94	/// Most of the regex engines in this crate require some kind of mutable
95	/// "scratch" space to read and write from while performing a search. Since
96	/// a meta regex composes these regex engines, a meta regex also requires
97	/// mutable scratch space. This scratch space is called a [`Cache`].
98	///
99	/// Most regex engines _also_ usually have a read-only component, typically
100	/// a [Thompson `NFA`](crate::nfa::thompson::NFA).
101	///
102	/// In order to make the `Regex` API convenient, most of the routines hide
103	/// the fact that a `Cache` is needed at all. To achieve this, a [memory
104	/// pool](crate::util::pool::Pool) is used internally to retrieve `Cache`
105	/// values in a thread safe way that also permits reuse. This in turn implies
106	/// that every such search call requires some form of synchronization. Usually
107	/// this synchronization is fast enough to not notice, but in some cases, it
108	/// can be a bottleneck. This typically occurs when all of the following are
109	/// true:
110	///
111	/// The same `Regex` is shared across multiple threads simultaneously,*
112	/// usually via a [`util::lazy::Lazy`](crate::util::lazy::Lazy) or something
113	/// similar from the `once_cell` or `lazy_static` crates.
114	/// The primary unit of work in each thread is a regex search.*
115	/// Searches are run on very short haystacks.*
116	///
117	/// This particular case can lead to high contention on the pool used by a
118	/// `Regex` internally, which can in turn increase latency to a noticeable
119	/// effect. This cost can be mitigated in one of the following ways:
120	///
121	/// Use a distinct copy of a `Regex` in each thread, usually by cloning it.*
122	/// Cloning a `Regex` _does not_ do a deep copy of its read-only component.
123	/// But it does lead to each `Regex` having its own memory pool, which in
124	/// turn eliminates the problem of contention. In general, this technique should
125	/// not result in any additional memory usage when compared to sharing the same
126	/// `Regex` across multiple threads simultaneously.
127	/// Use lower level APIs, like* [`Regex::search_with`], which permit passing
128	/// a `Cache` explicitly. In this case, it is up to you to determine how best
129	/// to provide a `Cache`. For example, you might put a `Cache` in thread-local
130	/// storage if your use case allows for it.
131	///
132	/// Overall, this is an issue that happens rarely in practice, but it can
133	/// happen.
134	///
135	/// # Warning: spin-locks may be used in alloc-only mode
136	///
137	/// When this crate is built without the `std` feature and the high level APIs
138	/// on a `Regex` are used, then a spin-lock will be used to synchronize access
139	/// to an internal pool of `Cache` values. This may be undesirable because
140	/// a spin-lock is [effectively impossible to implement correctly in user
141	/// space][spinlocks-are-bad]. That is, more concretely, the spin-lock could
142	/// result in a deadlock.
143	///
144	/// [spinlocks-are-bad]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
145	///
146	/// If one wants to avoid the use of spin-locks when the `std` feature is
147	/// disabled, then you must use APIs that accept a `Cache` value explicitly.
148	/// For example, [`Regex::search_with`].
149	///
150	/// # Example
151	///
152	/// ```
153	/// use regex_automata::meta::Regex;
154	///
155	/// let re = Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$")?;
156	/// assert!(re.is_match("2010-03-14"));
157	///
158	/// # Ok::<(), Box<dyn std::error::Error>>(())
159	/// ```
160	///
161	/// # Example: anchored search
162	///
163	/// This example shows how to use [`Input::anchored`] to run an anchored
164	/// search, even when the regex pattern itself isn't anchored. An anchored
165	/// search guarantees that if a match is found, then the start offset of the
166	/// match corresponds to the offset at which the search was started.
167	///
168	/// ```
169	/// use regex_automata::{meta::Regex, Anchored, Input, Match};
170	///
171	/// let re = Regex::new(r"\bfoo\b")?;
172	/// let input = Input::new("xx foo xx").range(`3`..).anchored(Anchored::Yes);
173	/// // The offsets are in terms of the original haystack.
174	/// assert_eq!(Some(Match::must(`0`, `3`..`6`)), re.find(input));
175	///
176	/// // Notice that no match occurs here, because \b still takes the
177	/// // surrounding context into account, even if it means looking back
178	/// // before the start of your search.
179	/// let hay = "xxfoo xx";
180	/// let input = Input::new(hay).range(`2`..).anchored(Anchored::Yes);
181	/// assert_eq!(None, re.find(input));
182	/// // Indeed, you cannot achieve the above by simply slicing the
183	/// // haystack itself, since the regex engine can't see the
184	/// // surrounding context. This is why 'Input' permits setting
185	/// // the bounds of a search!
186	/// let input = Input::new(&hay[`2`..]).anchored(Anchored::Yes);
187	/// // WRONG!
188	/// assert_eq!(Some(Match::must(`0`, `0`..`3`)), re.find(input));
189	///
190	/// # Ok::<(), Box<dyn std::error::Error>>(())
191	/// ```
192	///
193	/// # Example: earliest search
194	///
195	/// This example shows how to use [`Input::earliest`] to run a search that
196	/// might stop before finding the typical leftmost match.
197	///
198	/// ```
199	/// use regex_automata::{meta::Regex, Anchored, Input, Match};
200	///
201	/// let re = Regex::new(r"[a-z]{3}\|b")?;
202	/// let input = Input::new("abc").earliest(`true`);
203	/// assert_eq!(Some(Match::must(`0`, `1`..`2`)), re.find(input));
204	///
205	/// // Note that "earliest" isn't really a match semantic unto itself.
206	/// // Instead, it is merely an instruction to whatever regex engine
207	/// // gets used internally to quit as soon as it can. For example,
208	/// // this regex uses a different search technique, and winds up
209	/// // producing a different (but valid) match!
210	/// let re = Regex::new(r"abc\|b")?;
211	/// let input = Input::new("abc").earliest(`true`);
212	/// assert_eq!(Some(Match::must(`0`, `0`..`3`)), re.find(input));
213	///
214	/// # Ok::<(), Box<dyn std::error::Error>>(())
215	/// ```
216	///
217	/// # Example: change the line terminator
218	///
219	/// This example shows how to enable multi-line mode by default and change
220	/// the line terminator to the NUL byte:
221	///
222	/// ```
223	/// use regex_automata::{meta::Regex, util::syntax, Match};
224	///
225	/// let re = Regex::builder()
226	/// .syntax(syntax::Config::new().multi_line(`true`))
227	/// .configure(Regex::config().line_terminator(b'`\x00`'))
228	/// .build(r"^foo$")?;
229	/// let hay = "`\x00`foo`\x00`";
230	/// assert_eq!(Some(Match::must(`0`, `1`..`4`)), re.find(hay));
231	///
232	/// # Ok::<(), Box<dyn std::error::Error>>(())
233	/// ```
234	#[derive(Debug)]
235	pub struct Regex {
236	/// The actual regex implementation.
237	imp: Arc<RegexI>,
238	/// A thread safe pool of caches.
239	///
240	/// For the higher level search APIs, a `Cache` is automatically plucked
241	/// from this pool before running a search. The lower level `with` methods
242	/// permit the caller to provide their own cache, thereby bypassing
243	/// accesses to this pool.
244	///
245	/// Note that we put this outside the `Arc` so that cloning a `Regex`
246	/// results in creating a fresh `CachePool`. This in turn permits callers
247	/// to clone regexes into separate threads where each such regex gets
248	/// the pool's "thread owner" optimization. Otherwise, if one shares the
249	/// `Regex` directly, then the pool will go through a slower mutex path for
250	/// all threads except for the "owner."
251	pool: CachePool,
252	}
253
254	/// The internal implementation of `Regex`, split out so that it can be wrapped
255	/// in an `Arc`.
256	#[derive(Debug)]
257	struct RegexI {
258	/// The core matching engine.
259	///
260	/// Why is this reference counted when RegexI is already wrapped in an Arc?
261	/// Well, we need to capture this in a closure to our `Pool` below in order
262	/// to create new `Cache` values when needed. So since it needs to be in
263	/// two places, we make it reference counted.
264	///
265	/// We make `RegexI` itself reference counted too so that `Regex` itself
266	/// stays extremely small and very cheap to clone.
267	strat: Arc<dyn Strategy>,
268	/// Metadata about the regexes driving the strategy. The metadata is also
269	/// usually stored inside the strategy too, but we put it here as well
270	/// so that we can get quick access to it (without virtual calls) before
271	/// executing the regex engine. For example, we use this metadata to
272	/// detect a subset of cases where we know a match is impossible, and can
273	/// thus avoid calling into the strategy at all.
274	///
275	/// Since `RegexInfo` is stored in multiple places, it is also reference
276	/// counted.
277	info: RegexInfo,
278	}
279
280	/// Convenience constructors for a `Regex` using the default configuration.
281	impl Regex {
282	/// Builds a `Regex` from a single pattern string using the default
283	/// configuration.
284	///
285	/// If there was a problem parsing the pattern or a problem turning it into
286	/// a regex matcher, then an error is returned.
287	///
288	/// If you want to change the configuration of a `Regex`, use a [`Builder`]
289	/// with a [`Config`].
290	///
291	/// # Example
292	///
293	/// ```
294	/// use regex_automata::{meta::Regex, Match};
295	///
296	/// let re = Regex::new(r"(?Rm)^foo$")?;
297	/// let hay = "`\r\n`foo`\r\n`";
298	/// assert_eq!(Some(Match::must(`0`, `2`..`5`)), re.find(hay));
299	///
300	/// # Ok::<(), Box<dyn std::error::Error>>(())
301	/// ```
302	pub fn new(pattern: &str) -> Result<Regex, BuildError> {
303	Regex::builder().build(pattern)
304	}
305
306	/// Builds a `Regex` from many pattern strings using the default
307	/// configuration.
308	///
309	/// If there was a problem parsing any of the patterns or a problem turning
310	/// them into a regex matcher, then an error is returned.
311	///
312	/// If you want to change the configuration of a `Regex`, use a [`Builder`]
313	/// with a [`Config`].
314	///
315	/// # Example: simple lexer
316	///
317	/// This simplistic example leverages the multi-pattern support to build a
318	/// simple little lexer. The pattern ID in the match tells you which regex
319	/// matched, which in turn might be used to map back to the "type" of the
320	/// token returned by the lexer.
321	///
322	/// ```
323	/// use regex_automata::{meta::Regex, Match};
324	///
325	/// let re = Regex::new_many(&[
326	/// r"[[:space:]]",
327	/// r"[A-Za-z0-9][A-Za-z0-9_]+",
328	/// r"->",
329	/// r".",
330	/// ])?;
331	/// let haystack = "fn is_boss(bruce: i32, springsteen: String) -> bool;";
332	/// let matches: Vec<Match> = re.find_iter(haystack).collect();
333	/// assert_eq!(matches, vec![
334	/// Match::must(`1`, `0`..`2`), // 'fn'
335	/// Match::must(`0`, `2`..`3`), // ' '
336	/// Match::must(`1`, `3`..`10`), // 'is_boss'
337	/// Match::must(`3`, `10`..`11`), // '('
338	/// Match::must(`1`, `11`..`16`), // 'bruce'
339	/// Match::must(`3`, `16`..`17`), // ':'
340	/// Match::must(`0`, `17`..`18`), // ' '
341	/// Match::must(`1`, `18`..`21`), // 'i32'
342	/// Match::must(`3`, `21`..`22`), // ','
343	/// Match::must(`0`, `22`..`23`), // ' '
344	/// Match::must(`1`, `23`..`34`), // 'springsteen'
345	/// Match::must(`3`, `34`..`35`), // ':'
346	/// Match::must(`0`, `35`..`36`), // ' '
347	/// Match::must(`1`, `36`..`42`), // 'String'
348	/// Match::must(`3`, `42`..`43`), // ')'
349	/// Match::must(`0`, `43`..`44`), // ' '
350	/// Match::must(`2`, `44`..`46`), // '->'
351	/// Match::must(`0`, `46`..`47`), // ' '
352	/// Match::must(`1`, `47`..`51`), // 'bool'
353	/// Match::must(`3`, `51`..`52`), // ';'
354	/// ]);
355	///
356	/// # Ok::<(), Box<dyn std::error::Error>>(())
357	/// ```
358	///
359	/// One can write a lexer like the above using a regex like
360	/// `(?P<space>[[:space:]])\|(?P<ident>[A-Za-z0-9][A-Za-z0-9_]+)\|...`,
361	/// but then you need to ask whether capture group matched to determine
362	/// which branch in the regex matched, and thus, which token the match
363	/// corresponds to. In contrast, the above example includes the pattern ID
364	/// in the match. There's no need to use capture groups at all.
365	///
366	/// # Example: finding the pattern that caused an error
367	///
368	/// When a syntax error occurs, it is possible to ask which pattern
369	/// caused the syntax error.
370	///
371	/// ```
372	/// use regex_automata::{meta::Regex, PatternID};
373	///
374	/// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err();
375	/// assert_eq!(Some(PatternID::must(`2`)), err.pattern());
376	/// ```
377	///
378	/// # Example: zero patterns is valid
379	///
380	/// Building a regex with zero patterns results in a regex that never
381	/// matches anything. Because this routine is generic, passing an empty
382	/// slice usually requires a turbo-fish (or something else to help type
383	/// inference).
384	///
385	/// ```
386	/// use regex_automata::{meta::Regex, util::syntax, Match};
387	///
388	/// let re = Regex::new_many::<&str>(&[])?;
389	/// assert_eq!(None, re.find(""));
390	///
391	/// # Ok::<(), Box<dyn std::error::Error>>(())
392	/// ```
393	pub fn new_many<P: AsRef<str>>(
394	patterns: &[P],
395	) -> Result<Regex, BuildError> {
396	Regex::builder().build_many(patterns)
397	}
398
399	/// Return a default configuration for a `Regex`.
400	///
401	/// This is a convenience routine to avoid needing to import the [`Config`]
402	/// type when customizing the construction of a `Regex`.
403	///
404	/// # Example: lower the NFA size limit
405	///
406	/// In some cases, the default size limit might be too big. The size limit
407	/// can be lowered, which will prevent large regex patterns from compiling.
408	///
409	/// ```
410	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
411	/// use regex_automata::meta::Regex;
412	///
413	/// let result = Regex::builder()
414	/// .configure(Regex::config().nfa_size_limit(Some(`20` * (`1`<<`10`))))
415	/// // Not even 20KB is enough to build a single large Unicode class!
416	/// .build(r"\pL");
417	/// assert!(result.is_err());
418	///
419	/// # Ok::<(), Box<dyn std::error::Error>>(())
420	/// ```
421	pub fn config() -> Config {
422	Config::new()
423	}
424
425	/// Return a builder for configuring the construction of a `Regex`.
426	///
427	/// This is a convenience routine to avoid needing to import the
428	/// [`Builder`] type in common cases.
429	///
430	/// # Example: change the line terminator
431	///
432	/// This example shows how to enable multi-line mode by default and change
433	/// the line terminator to the NUL byte:
434	///
435	/// ```
436	/// use regex_automata::{meta::Regex, util::syntax, Match};
437	///
438	/// let re = Regex::builder()
439	/// .syntax(syntax::Config::new().multi_line(`true`))
440	/// .configure(Regex::config().line_terminator(b'`\x00`'))
441	/// .build(r"^foo$")?;
442	/// let hay = "`\x00`foo`\x00`";
443	/// assert_eq!(Some(Match::must(`0`, `1`..`4`)), re.find(hay));
444	///
445	/// # Ok::<(), Box<dyn std::error::Error>>(())
446	/// ```
447	pub fn builder() -> Builder {
448	Builder::new()
449	}
450	}
451
452	/// High level convenience routines for using a regex to search a haystack.
453	impl Regex {
454	/// Returns true if and only if this regex matches the given haystack.
455	///
456	/// This routine may short circuit if it knows that scanning future input
457	/// will never lead to a different result. (Consider how this might make
458	/// a difference given the regex `a+` on the haystack `aaaaaaaaaaaaaaa`.
459	/// This routine _may_ stop after it sees the first `a`, but routines like
460	/// `find` need to continue searching because `+` is greedy by default.)
461	///
462	/// # Example
463	///
464	/// ```
465	/// use regex_automata::meta::Regex;
466	///
467	/// let re = Regex::new("foo[0-9]+bar")?;
468	///
469	/// assert!(re.is_match("foo12345bar"));
470	/// assert!(!re.is_match("foobar"));
471	///
472	/// # Ok::<(), Box<dyn std::error::Error>>(())
473	/// ```
474	///
475	/// # Example: consistency with search APIs
476	///
477	/// `is_match` is guaranteed to return `true` whenever `find` returns a
478	/// match. This includes searches that are executed entirely within a
479	/// codepoint:
480	///
481	/// ```
482	/// use regex_automata::{meta::Regex, Input};
483	///
484	/// let re = Regex::new("a*")?;
485	///
486	/// // This doesn't match because the default configuration bans empty
487	/// // matches from splitting a codepoint.
488	/// assert!(!re.is_match(Input::new("☃").span(`1`..`2`)));
489	/// assert_eq!(None, re.find(Input::new("☃").span(`1`..`2`)));
490	///
491	/// # Ok::<(), Box<dyn std::error::Error>>(())
492	/// ```
493	///
494	/// Notice that when UTF-8 mode is disabled, then the above reports a
495	/// match because the restriction against zero-width matches that split a
496	/// codepoint has been lifted:
497	///
498	/// ```
499	/// use regex_automata::{meta::Regex, Input, Match};
500	///
501	/// let re = Regex::builder()
502	/// .configure(Regex::config().utf8_empty(`false`))
503	/// .build("a*")?;
504	///
505	/// assert!(re.is_match(Input::new("☃").span(`1`..`2`)));
506	/// assert_eq!(
507	/// Some(Match::must(`0`, `1`..`1`)),
508	/// re.find(Input::new("☃").span(`1`..`2`)),
509	/// );
510	///
511	/// # Ok::<(), Box<dyn std::error::Error>>(())
512	/// ```
513	///
514	/// A similar idea applies when using line anchors with CRLF mode enabled,
515	/// which prevents them from matching between a `\r` and a `\n`.
516	///
517	/// ```
518	/// use regex_automata::{meta::Regex, Input, Match};
519	///
520	/// let re = Regex::new(r"(?Rm:$)")?;
521	/// assert!(!re.is_match(Input::new("`\r\n`").span(`1`..`1`)));
522	/// // A regular line anchor, which only considers \n as a
523	/// // line terminator, will match.
524	/// let re = Regex::new(r"(?m:$)")?;
525	/// assert!(re.is_match(Input::new("`\r\n`").span(`1`..`1`)));
526	///
527	/// # Ok::<(), Box<dyn std::error::Error>>(())
528	/// ```
529	#[inline]
530	pub fn is_match<'h, I: Into<Input<'h>>>(&self, input: I) -> bool {
531	let input = input.into().earliest(`true`);
532	if self.imp.info.is_impossible(&input) {
533	return `false`;
534	}
535	let mut guard = self.pool.get();
536	let result = self.imp.strat.is_match(&mut guard, &input);
537	// See 'Regex::search' for why we put the guard back explicitly.
538	PoolGuard::put(guard);
539	result
540	}
541
542	/// Executes a leftmost search and returns the first match that is found,
543	/// if one exists.
544	///
545	/// # Example
546	///
547	/// ```
548	/// use regex_automata::{meta::Regex, Match};
549	///
550	/// let re = Regex::new("foo[0-9]+")?;
551	/// assert_eq!(Some(Match::must(`0`, `0`..`8`)), re.find("foo12345"));
552	///
553	/// # Ok::<(), Box<dyn std::error::Error>>(())
554	/// ```
555	#[inline]
556	pub fn find<'h, I: Into<Input<'h>>>(&self, input: I) -> Option<Match> {
557	self.search(&input.into())
558	}
559
560	/// Executes a leftmost forward search and writes the spans of capturing
561	/// groups that participated in a match into the provided [`Captures`]
562	/// value. If no match was found, then [`Captures::is_match`] is guaranteed
563	/// to return `false`.
564	///
565	/// # Example
566	///
567	/// ```
568	/// use regex_automata::{meta::Regex, Span};
569	///
570	/// let re = Regex::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
571	/// let mut caps = re.create_captures();
572	///
573	/// re.captures("2010-03-14", &mut caps);
574	/// assert!(caps.is_match());
575	/// assert_eq!(Some(Span::from(`0`..`4`)), caps.get_group(`1`));
576	/// assert_eq!(Some(Span::from(`5`..`7`)), caps.get_group(`2`));
577	/// assert_eq!(Some(Span::from(`8`..`10`)), caps.get_group(`3`));
578	///
579	/// # Ok::<(), Box<dyn std::error::Error>>(())
580	/// ```
581	#[inline]
582	pub fn captures<'h, I: Into<Input<'h>>>(
583	&self,
584	input: I,
585	caps: &mut Captures,
586	) {
587	self.search_captures(&input.into(), caps)
588	}
589
590	/// Returns an iterator over all non-overlapping leftmost matches in
591	/// the given haystack. If no match exists, then the iterator yields no
592	/// elements.
593	///
594	/// # Example
595	///
596	/// ```
597	/// use regex_automata::{meta::Regex, Match};
598	///
599	/// let re = Regex::new("foo[0-9]+")?;
600	/// let haystack = "foo1 foo12 foo123";
601	/// let matches: Vec<Match> = re.find_iter(haystack).collect();
602	/// assert_eq!(matches, vec![
603	/// Match::must(`0`, `0`..`4`),
604	/// Match::must(`0`, `5`..`10`),
605	/// Match::must(`0`, `11`..`17`),
606	/// ]);
607	/// # Ok::<(), Box<dyn std::error::Error>>(())
608	/// ```
609	#[inline]
610	pub fn find_iter<'r, 'h, I: Into<Input<'h>>>(
611	&'r self,
612	input: I,
613	) -> FindMatches<'r, 'h> {
614	let cache = self.pool.get();
615	let it = iter::Searcher::new(input.into());
616	FindMatches { re: self, cache, it }
617	}
618
619	/// Returns an iterator over all non-overlapping `Captures` values. If no
620	/// match exists, then the iterator yields no elements.
621	///
622	/// This yields the same matches as [`Regex::find_iter`], but it includes
623	/// the spans of all capturing groups that participate in each match.
624	///
625	/// Tip:* See* [`util::iter::Searcher`](crate::util::iter::Searcher) for
626	/// how to correctly iterate over all matches in a haystack while avoiding
627	/// the creation of a new `Captures` value for every match. (Which you are
628	/// forced to do with an `Iterator`.)
629	///
630	/// # Example
631	///
632	/// ```
633	/// use regex_automata::{meta::Regex, Span};
634	///
635	/// let re = Regex::new("foo(?P<numbers>[0-9]+)")?;
636	///
637	/// let haystack = "foo1 foo12 foo123";
638	/// let matches: Vec<Span> = re
639	/// .captures_iter(haystack)
640	/// // The unwrap is OK since 'numbers' matches if the pattern matches.
641	/// .map(\|caps\| caps.get_group_by_name("numbers").unwrap())
642	/// .collect();
643	/// assert_eq!(matches, vec![
644	/// Span::from(`3`..`4`),
645	/// Span::from(`8`..`10`),
646	/// Span::from(`14`..`17`),
647	/// ]);
648	/// # Ok::<(), Box<dyn std::error::Error>>(())
649	/// ```
650	#[inline]
651	pub fn captures_iter<'r, 'h, I: Into<Input<'h>>>(
652	&'r self,
653	input: I,
654	) -> CapturesMatches<'r, 'h> {
655	let cache = self.pool.get();
656	let caps = self.create_captures();
657	let it = iter::Searcher::new(input.into());
658	CapturesMatches { re: self, cache, caps, it }
659	}
660
661	/// Returns an iterator of spans of the haystack given, delimited by a
662	/// match of the regex. Namely, each element of the iterator corresponds to
663	/// a part of the haystack that isn't* matched by the regular expression.*
664	///
665	/// # Example
666	///
667	/// To split a string delimited by arbitrary amounts of spaces or tabs:
668	///
669	/// ```
670	/// use regex_automata::meta::Regex;
671	///
672	/// let re = Regex::new(r"[ \t]+")?;
673	/// let hay = "a b `\t` c`\t`d e";
674	/// let fields: Vec<&str> = re.split(hay).map(\|span\| &hay[span]).collect();
675	/// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
676	///
677	/// # Ok::<(), Box<dyn std::error::Error>>(())
678	/// ```
679	///
680	/// # Example: more cases
681	///
682	/// Basic usage:
683	///
684	/// ```
685	/// use regex_automata::meta::Regex;
686	///
687	/// let re = Regex::new(r" ")?;
688	/// let hay = "Mary had a little lamb";
689	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
690	/// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]);
691	///
692	/// let re = Regex::new(r"X")?;
693	/// let hay = "";
694	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
695	/// assert_eq!(got, vec![""]);
696	///
697	/// let re = Regex::new(r"X")?;
698	/// let hay = "lionXXtigerXleopard";
699	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
700	/// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]);
701	///
702	/// let re = Regex::new(r"::")?;
703	/// let hay = "lion::tiger::leopard";
704	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
705	/// assert_eq!(got, vec!["lion", "tiger", "leopard"]);
706	///
707	/// # Ok::<(), Box<dyn std::error::Error>>(())
708	/// ```
709	///
710	/// If a haystack contains multiple contiguous matches, you will end up
711	/// with empty spans yielded by the iterator:
712	///
713	/// ```
714	/// use regex_automata::meta::Regex;
715	///
716	/// let re = Regex::new(r"X")?;
717	/// let hay = "XXXXaXXbXc";
718	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
719	/// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
720	///
721	/// let re = Regex::new(r"/")?;
722	/// let hay = "(///)";
723	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
724	/// assert_eq!(got, vec!["(", "", "", ")"]);
725	///
726	/// # Ok::<(), Box<dyn std::error::Error>>(())
727	/// ```
728	///
729	/// Separators at the start or end of a haystack are neighbored by empty
730	/// spans.
731	///
732	/// ```
733	/// use regex_automata::meta::Regex;
734	///
735	/// let re = Regex::new(r"0")?;
736	/// let hay = "010";
737	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
738	/// assert_eq!(got, vec!["", "1", ""]);
739	///
740	/// # Ok::<(), Box<dyn std::error::Error>>(())
741	/// ```
742	///
743	/// When the empty string is used as a regex, it splits at every valid
744	/// UTF-8 boundary by default (which includes the beginning and end of the
745	/// haystack):
746	///
747	/// ```
748	/// use regex_automata::meta::Regex;
749	///
750	/// let re = Regex::new(r"")?;
751	/// let hay = "rust";
752	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
753	/// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]);
754	///
755	/// // Splitting by an empty string is UTF-8 aware by default!
756	/// let re = Regex::new(r"")?;
757	/// let hay = "☃";
758	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
759	/// assert_eq!(got, vec!["", "☃", ""]);
760	///
761	/// # Ok::<(), Box<dyn std::error::Error>>(())
762	/// ```
763	///
764	/// But note that UTF-8 mode for empty strings can be disabled, which will
765	/// then result in a match at every byte offset in the haystack,
766	/// including between every UTF-8 code unit.
767	///
768	/// ```
769	/// use regex_automata::meta::Regex;
770	///
771	/// let re = Regex::builder()
772	/// .configure(Regex::config().utf8_empty(`false`))
773	/// .build(r"")?;
774	/// let hay = "☃".as_bytes();
775	/// let got: Vec<&[u8]> = re.split(hay).map(\|sp\| &hay[sp]).collect();
776	/// assert_eq!(got, vec![
777	/// // Writing byte string slices is just brutal. The problem is that
778	/// // b"foo" has type &[u8; 3] instead of &[u8].
779	/// &[][..], &[b'`\xE2`'][..], &[b'`\x98`'][..], &[b'`\x83`'][..], &[][..],
780	/// ]);
781	///
782	/// # Ok::<(), Box<dyn std::error::Error>>(())
783	/// ```
784	///
785	/// Contiguous separators (commonly shows up with whitespace), can lead to
786	/// possibly surprising behavior. For example, this code is correct:
787	///
788	/// ```
789	/// use regex_automata::meta::Regex;
790	///
791	/// let re = Regex::new(r" ")?;
792	/// let hay = " a b c";
793	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
794	/// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
795	///
796	/// # Ok::<(), Box<dyn std::error::Error>>(())
797	/// ```
798	///
799	/// It does not* give you `["a", "b", "c"]`. For that behavior, you'd want*
800	/// to match contiguous space characters:
801	///
802	/// ```
803	/// use regex_automata::meta::Regex;
804	///
805	/// let re = Regex::new(r" +")?;
806	/// let hay = " a b c";
807	/// let got: Vec<&str> = re.split(hay).map(\|sp\| &hay[sp]).collect();
808	/// // N.B. This does still include a leading empty span because ' +'
809	/// // matches at the beginning of the haystack.
810	/// assert_eq!(got, vec!["", "a", "b", "c"]);
811	///
812	/// # Ok::<(), Box<dyn std::error::Error>>(())
813	/// ```
814	#[inline]
815	pub fn split<'r, 'h, I: Into<Input<'h>>>(
816	&'r self,
817	input: I,
818	) -> Split<'r, 'h> {
819	Split { finder: self.find_iter(input), last: `0` }
820	}
821
822	/// Returns an iterator of at most `limit` spans of the haystack given,
823	/// delimited by a match of the regex. (A `limit` of `0` will return no
824	/// spans.) Namely, each element of the iterator corresponds to a part
825	/// of the haystack that isn't* matched by the regular expression. The*
826	/// remainder of the haystack that is not split will be the last element in
827	/// the iterator.
828	///
829	/// # Example
830	///
831	/// Get the first two words in some haystack:
832	///
833	/// ```
834	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
835	/// use regex_automata::meta::Regex;
836	///
837	/// let re = Regex::new(r"\W+").unwrap();
838	/// let hay = "Hey! How are you?";
839	/// let fields: Vec<&str> =
840	/// re.splitn(hay, `3`).map(\|span\| &hay[span]).collect();
841	/// assert_eq!(fields, vec!["Hey", "How", "are you?"]);
842	///
843	/// # Ok::<(), Box<dyn std::error::Error>>(())
844	/// ```
845	///
846	/// # Examples: more cases
847	///
848	/// ```
849	/// use regex_automata::meta::Regex;
850	///
851	/// let re = Regex::new(r" ")?;
852	/// let hay = "Mary had a little lamb";
853	/// let got: Vec<&str> = re.splitn(hay, `3`).map(\|sp\| &hay[sp]).collect();
854	/// assert_eq!(got, vec!["Mary", "had", "a little lamb"]);
855	///
856	/// let re = Regex::new(r"X")?;
857	/// let hay = "";
858	/// let got: Vec<&str> = re.splitn(hay, `3`).map(\|sp\| &hay[sp]).collect();
859	/// assert_eq!(got, vec![""]);
860	///
861	/// let re = Regex::new(r"X")?;
862	/// let hay = "lionXXtigerXleopard";
863	/// let got: Vec<&str> = re.splitn(hay, `3`).map(\|sp\| &hay[sp]).collect();
864	/// assert_eq!(got, vec!["lion", "", "tigerXleopard"]);
865	///
866	/// let re = Regex::new(r"::")?;
867	/// let hay = "lion::tiger::leopard";
868	/// let got: Vec<&str> = re.splitn(hay, `2`).map(\|sp\| &hay[sp]).collect();
869	/// assert_eq!(got, vec!["lion", "tiger::leopard"]);
870	///
871	/// let re = Regex::new(r"X")?;
872	/// let hay = "abcXdef";
873	/// let got: Vec<&str> = re.splitn(hay, `1`).map(\|sp\| &hay[sp]).collect();
874	/// assert_eq!(got, vec!["abcXdef"]);
875	///
876	/// let re = Regex::new(r"X")?;
877	/// let hay = "abcdef";
878	/// let got: Vec<&str> = re.splitn(hay, `2`).map(\|sp\| &hay[sp]).collect();
879	/// assert_eq!(got, vec!["abcdef"]);
880	///
881	/// let re = Regex::new(r"X")?;
882	/// let hay = "abcXdef";
883	/// let got: Vec<&str> = re.splitn(hay, `0`).map(\|sp\| &hay[sp]).collect();
884	/// assert!(got.is_empty());
885	///
886	/// # Ok::<(), Box<dyn std::error::Error>>(())
887	/// ```
888	pub fn splitn<'r, 'h, I: Into<Input<'h>>>(
889	&'r self,
890	input: I,
891	limit: usize,
892	) -> SplitN<'r, 'h> {
893	SplitN { splits: self.split(input), limit }
894	}
895	}
896
897	/// Lower level search routines that give more control.
898	impl Regex {
899	/// Returns the start and end offset of the leftmost match. If no match
900	/// exists, then `None` is returned.
901	///
902	/// This is like [`Regex::find`] but, but it accepts a concrete `&Input`
903	/// instead of an `Into<Input>`.
904	///
905	/// # Example
906	///
907	/// ```
908	/// use regex_automata::{meta::Regex, Input, Match};
909	///
910	/// let re = Regex::new(r"Samwise\|Sam")?;
911	/// let input = Input::new(
912	/// "one of the chief characters, Samwise the Brave",
913	/// );
914	/// assert_eq!(Some(Match::must(`0`, `29`..`36`)), re.search(&input));
915	///
916	/// # Ok::<(), Box<dyn std::error::Error>>(())
917	/// ```
918	#[inline]
919	pub fn search(&self, input: &Input<'_>) -> Option<Match> {
920	if self.imp.info.is_impossible(input) {
921	return None;
922	}
923	let mut guard = self.pool.get();
924	let result = self.imp.strat.search(&mut guard, input);
925	// We do this dance with the guard and explicitly put it back in the
926	// pool because it seems to result in better codegen. If we let the
927	// guard's Drop impl put it back in the pool, then functions like
928	// ptr::drop_in_place get called and they don't* get inlined. This*
929	// isn't usually a big deal, but in latency sensitive benchmarks the
930	// extra function call can matter.
931	//
932	// I used `rebar measure -f '^grep/every-line$' -e meta` to measure
933	// the effects here.
934	//
935	// Note that this doesn't eliminate the latency effects of using the
936	// pool. There is still some (minor) cost for the "thread owner" of the
937	// pool. (i.e., The thread that first calls a regex search routine.)
938	// However, for other threads using the regex, the pool access can be
939	// quite expensive as it goes through a mutex. Callers can avoid this
940	// by either cloning the Regex (which creates a distinct copy of the
941	// pool), or callers can use the lower level APIs that accept a 'Cache'
942	// directly and do their own handling.
943	PoolGuard::put(guard);
944	result
945	}
946
947	/// Returns the end offset of the leftmost match. If no match exists, then
948	/// `None` is returned.
949	///
950	/// This is distinct from [`Regex::search`] in that it only returns the end
951	/// of a match and not the start of the match. Depending on a variety of
952	/// implementation details, this _may_ permit the regex engine to do less
953	/// overall work. For example, if a DFA is being used to execute a search,
954	/// then the start of a match usually requires running a separate DFA in
955	/// reverse to the find the start of a match. If one only needs the end of
956	/// a match, then the separate reverse scan to find the start of a match
957	/// can be skipped. (Note that the reverse scan is avoided even when using
958	/// `Regex::search` when possible, for example, in the case of an anchored
959	/// search.)
960	///
961	/// # Example
962	///
963	/// ```
964	/// use regex_automata::{meta::Regex, Input, HalfMatch};
965	///
966	/// let re = Regex::new(r"Samwise\|Sam")?;
967	/// let input = Input::new(
968	/// "one of the chief characters, Samwise the Brave",
969	/// );
970	/// assert_eq!(Some(HalfMatch::must(`0`, `36`)), re.search_half(&input));
971	///
972	/// # Ok::<(), Box<dyn std::error::Error>>(())
973	/// ```
974	#[inline]
975	pub fn search_half(&self, input: &Input<'_>) -> Option<HalfMatch> {
976	if self.imp.info.is_impossible(input) {
977	return None;
978	}
979	let mut guard = self.pool.get();
980	let result = self.imp.strat.search_half(&mut guard, input);
981	// See 'Regex::search' for why we put the guard back explicitly.
982	PoolGuard::put(guard);
983	result
984	}
985
986	/// Executes a leftmost forward search and writes the spans of capturing
987	/// groups that participated in a match into the provided [`Captures`]
988	/// value. If no match was found, then [`Captures::is_match`] is guaranteed
989	/// to return `false`.
990	///
991	/// This is like [`Regex::captures`], but it accepts a concrete `&Input`
992	/// instead of an `Into<Input>`.
993	///
994	/// # Example: specific pattern search
995	///
996	/// This example shows how to build a multi-pattern `Regex` that permits
997	/// searching for specific patterns.
998	///
999	/// ```
1000	/// use regex_automata::{
1001	/// meta::Regex,
1002	/// Anchored, Match, PatternID, Input,
1003	/// };
1004	///
1005	/// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
1006	/// let mut caps = re.create_captures();
1007	/// let haystack = "foo123";
1008	///
1009	/// // Since we are using the default leftmost-first match and both
1010	/// // patterns match at the same starting position, only the first pattern
1011	/// // will be returned in this case when doing a search for any of the
1012	/// // patterns.
1013	/// let expected = Some(Match::must(`0`, `0`..`6`));
1014	/// re.search_captures(&Input::new(haystack), &mut caps);
1015	/// assert_eq!(expected, caps.get_match());
1016	///
1017	/// // But if we want to check whether some other pattern matches, then we
1018	/// // can provide its pattern ID.
1019	/// let expected = Some(Match::must(`1`, `0`..`6`));
1020	/// let input = Input::new(haystack)
1021	/// .anchored(Anchored::Pattern(PatternID::must(`1`)));
1022	/// re.search_captures(&input, &mut caps);
1023	/// assert_eq!(expected, caps.get_match());
1024	///
1025	/// # Ok::<(), Box<dyn std::error::Error>>(())
1026	/// ```
1027	///
1028	/// # Example: specifying the bounds of a search
1029	///
1030	/// This example shows how providing the bounds of a search can produce
1031	/// different results than simply sub-slicing the haystack.
1032	///
1033	/// ```
1034	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1035	/// use regex_automata::{meta::Regex, Match, Input};
1036	///
1037	/// let re = Regex::new(r"\b[0-9]{3}\b")?;
1038	/// let mut caps = re.create_captures();
1039	/// let haystack = "foo123bar";
1040	///
1041	/// // Since we sub-slice the haystack, the search doesn't know about
1042	/// // the larger context and assumes that `123` is surrounded by word
1043	/// // boundaries. And of course, the match position is reported relative
1044	/// // to the sub-slice as well, which means we get `0..3` instead of
1045	/// // `3..6`.
1046	/// let expected = Some(Match::must(`0`, `0`..`3`));
1047	/// let input = Input::new(&haystack[`3`..`6`]);
1048	/// re.search_captures(&input, &mut caps);
1049	/// assert_eq!(expected, caps.get_match());
1050	///
1051	/// // But if we provide the bounds of the search within the context of the
1052	/// // entire haystack, then the search can take the surrounding context
1053	/// // into account. (And if we did find a match, it would be reported
1054	/// // as a valid offset into `haystack` instead of its sub-slice.)
1055	/// let expected = None;
1056	/// let input = Input::new(haystack).range(`3`..`6`);
1057	/// re.search_captures(&input, &mut caps);
1058	/// assert_eq!(expected, caps.get_match());
1059	///
1060	/// # Ok::<(), Box<dyn std::error::Error>>(())
1061	/// ```
1062	#[inline]
1063	pub fn search_captures(&self, input: &Input<'_>, caps: &mut Captures) {
1064	caps.set_pattern(None);
1065	let pid = self.search_slots(input, caps.slots_mut());
1066	caps.set_pattern(pid);
1067	}
1068
1069	/// Executes a leftmost forward search and writes the spans of capturing
1070	/// groups that participated in a match into the provided `slots`, and
1071	/// returns the matching pattern ID. The contents of the slots for patterns
1072	/// other than the matching pattern are unspecified. If no match was found,
1073	/// then `None` is returned and the contents of `slots` is unspecified.
1074	///
1075	/// This is like [`Regex::search`], but it accepts a raw slots slice
1076	/// instead of a `Captures` value. This is useful in contexts where you
1077	/// don't want or need to allocate a `Captures`.
1078	///
1079	/// It is legal to pass _any_ number of slots to this routine. If the regex
1080	/// engine would otherwise write a slot offset that doesn't fit in the
1081	/// provided slice, then it is simply skipped. In general though, there are
1082	/// usually three slice lengths you might want to use:
1083	///
1084	/// An empty slice, if you only care about which pattern matched.*
1085	/// A slice with* [`pattern_len() * 2`](Regex::pattern_len) slots, if you
1086	/// only care about the overall match spans for each matching pattern.
1087	/// A slice with*
1088	/// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which
1089	/// permits recording match offsets for every capturing group in every
1090	/// pattern.
1091	///
1092	/// # Example
1093	///
1094	/// This example shows how to find the overall match offsets in a
1095	/// multi-pattern search without allocating a `Captures` value. Indeed, we
1096	/// can put our slots right on the stack.
1097	///
1098	/// ```
1099	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1100	/// use regex_automata::{meta::Regex, PatternID, Input};
1101	///
1102	/// let re = Regex::new_many(&[
1103	/// r"\pL+",
1104	/// r"\d+",
1105	/// ])?;
1106	/// let input = Input::new("!@#123");
1107	///
1108	/// // We only care about the overall match offsets here, so we just
1109	/// // allocate two slots for each pattern. Each slot records the start
1110	/// // and end of the match.
1111	/// let mut slots = [None; `4`];
1112	/// let pid = re.search_slots(&input, &mut slots);
1113	/// assert_eq!(Some(PatternID::must(`1`)), pid);
1114	///
1115	/// // The overall match offsets are always at 'pid 2' and 'pid * 2 + 1'.*
1116	/// // See 'GroupInfo' for more details on the mapping between groups and
1117	/// // slot indices.
1118	/// let slot_start = pid.unwrap().as_usize() * `2`;
1119	/// let slot_end = slot_start + `1`;
1120	/// assert_eq!(Some(`3`), slots[slot_start].map(\|s\| s.get()));
1121	/// assert_eq!(Some(`6`), slots[slot_end].map(\|s\| s.get()));
1122	///
1123	/// # Ok::<(), Box<dyn std::error::Error>>(())
1124	/// ```
1125	#[inline]
1126	pub fn search_slots(
1127	&self,
1128	input: &Input<'_>,
1129	slots: &mut [Option<NonMaxUsize>],
1130	) -> Option<PatternID> {
1131	if self.imp.info.is_impossible(input) {
1132	return None;
1133	}
1134	let mut guard = self.pool.get();
1135	let result = self.imp.strat.search_slots(&mut guard, input, slots);
1136	// See 'Regex::search' for why we put the guard back explicitly.
1137	PoolGuard::put(guard);
1138	result
1139	}
1140
1141	/// Writes the set of patterns that match anywhere in the given search
1142	/// configuration to `patset`. If multiple patterns match at the same
1143	/// position and this `Regex` was configured with [`MatchKind::All`]
1144	/// semantics, then all matching patterns are written to the given set.
1145	///
1146	/// Unless all of the patterns in this `Regex` are anchored, then generally
1147	/// speaking, this will scan the entire haystack.
1148	///
1149	/// This search routine does not* clear the pattern set. This gives some*
1150	/// flexibility to the caller (e.g., running multiple searches with the
1151	/// same pattern set), but does make the API bug-prone if you're reusing
1152	/// the same pattern set for multiple searches but intended them to be
1153	/// independent.
1154	///
1155	/// If a pattern ID matched but the given `PatternSet` does not have
1156	/// sufficient capacity to store it, then it is not inserted and silently
1157	/// dropped.
1158	///
1159	/// # Example
1160	///
1161	/// This example shows how to find all matching patterns in a haystack,
1162	/// even when some patterns match at the same position as other patterns.
1163	/// It is important that we configure the `Regex` with [`MatchKind::All`]
1164	/// semantics here, or else overlapping matches will not be reported.
1165	///
1166	/// ```
1167	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1168	/// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet};
1169	///
1170	/// let patterns = &[
1171	/// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
1172	/// ];
1173	/// let re = Regex::builder()
1174	/// .configure(Regex::config().match_kind(MatchKind::All))
1175	/// .build_many(patterns)?;
1176	///
1177	/// let input = Input::new("foobar");
1178	/// let mut patset = PatternSet::new(re.pattern_len());
1179	/// re.which_overlapping_matches(&input, &mut patset);
1180	/// let expected = vec![`0`, `2`, `3`, `4`, `6`];
1181	/// let got: Vec<usize> = patset.iter().map(\|p\| p.as_usize()).collect();
1182	/// assert_eq!(expected, got);
1183	///
1184	/// # Ok::<(), Box<dyn std::error::Error>>(())
1185	/// ```
1186	#[inline]
1187	pub fn which_overlapping_matches(
1188	&self,
1189	input: &Input<'_>,
1190	patset: &mut PatternSet,
1191	) {
1192	if self.imp.info.is_impossible(input) {
1193	return;
1194	}
1195	let mut guard = self.pool.get();
1196	let result = self
1197	.imp
1198	.strat
1199	.which_overlapping_matches(&mut guard, input, patset);
1200	// See 'Regex::search' for why we put the guard back explicitly.
1201	PoolGuard::put(guard);
1202	result
1203	}
1204	}
1205
1206	/// Lower level search routines that give more control, and require the caller
1207	/// to provide an explicit [`Cache`] parameter.
1208	impl Regex {
1209	/// This is like [`Regex::search`], but requires the caller to
1210	/// explicitly pass a [`Cache`].
1211	///
1212	/// # Why pass a `Cache` explicitly?
1213	///
1214	/// Passing a `Cache` explicitly will bypass the use of an internal memory
1215	/// pool used by `Regex` to get a `Cache` for a search. The use of this
1216	/// pool can be slower in some cases when a `Regex` is used from multiple
1217	/// threads simultaneously. Typically, performance only becomes an issue
1218	/// when there is heavy contention, which in turn usually only occurs
1219	/// when each thread's primary unit of work is a regex search on a small
1220	/// haystack.
1221	///
1222	/// # Example
1223	///
1224	/// ```
1225	/// use regex_automata::{meta::Regex, Input, Match};
1226	///
1227	/// let re = Regex::new(r"Samwise\|Sam")?;
1228	/// let mut cache = re.create_cache();
1229	/// let input = Input::new(
1230	/// "one of the chief characters, Samwise the Brave",
1231	/// );
1232	/// assert_eq!(
1233	/// Some(Match::must(`0`, `29`..`36`)),
1234	/// re.search_with(&mut cache, &input),
1235	/// );
1236	///
1237	/// # Ok::<(), Box<dyn std::error::Error>>(())
1238	/// ```
1239	#[inline]
1240	pub fn search_with(
1241	&self,
1242	cache: &mut Cache,
1243	input: &Input<'_>,
1244	) -> Option<Match> {
1245	if self.imp.info.is_impossible(input) {
1246	return None;
1247	}
1248	self.imp.strat.search(cache, input)
1249	}
1250
1251	/// This is like [`Regex::search_half`], but requires the caller to
1252	/// explicitly pass a [`Cache`].
1253	///
1254	/// # Why pass a `Cache` explicitly?
1255	///
1256	/// Passing a `Cache` explicitly will bypass the use of an internal memory
1257	/// pool used by `Regex` to get a `Cache` for a search. The use of this
1258	/// pool can be slower in some cases when a `Regex` is used from multiple
1259	/// threads simultaneously. Typically, performance only becomes an issue
1260	/// when there is heavy contention, which in turn usually only occurs
1261	/// when each thread's primary unit of work is a regex search on a small
1262	/// haystack.
1263	///
1264	/// # Example
1265	///
1266	/// ```
1267	/// use regex_automata::{meta::Regex, Input, HalfMatch};
1268	///
1269	/// let re = Regex::new(r"Samwise\|Sam")?;
1270	/// let mut cache = re.create_cache();
1271	/// let input = Input::new(
1272	/// "one of the chief characters, Samwise the Brave",
1273	/// );
1274	/// assert_eq!(
1275	/// Some(HalfMatch::must(`0`, `36`)),
1276	/// re.search_half_with(&mut cache, &input),
1277	/// );
1278	///
1279	/// # Ok::<(), Box<dyn std::error::Error>>(())
1280	/// ```
1281	#[inline]
1282	pub fn search_half_with(
1283	&self,
1284	cache: &mut Cache,
1285	input: &Input<'_>,
1286	) -> Option<HalfMatch> {
1287	if self.imp.info.is_impossible(input) {
1288	return None;
1289	}
1290	self.imp.strat.search_half(cache, input)
1291	}
1292
1293	/// This is like [`Regex::search_captures`], but requires the caller to
1294	/// explicitly pass a [`Cache`].
1295	///
1296	/// # Why pass a `Cache` explicitly?
1297	///
1298	/// Passing a `Cache` explicitly will bypass the use of an internal memory
1299	/// pool used by `Regex` to get a `Cache` for a search. The use of this
1300	/// pool can be slower in some cases when a `Regex` is used from multiple
1301	/// threads simultaneously. Typically, performance only becomes an issue
1302	/// when there is heavy contention, which in turn usually only occurs
1303	/// when each thread's primary unit of work is a regex search on a small
1304	/// haystack.
1305	///
1306	/// # Example: specific pattern search
1307	///
1308	/// This example shows how to build a multi-pattern `Regex` that permits
1309	/// searching for specific patterns.
1310	///
1311	/// ```
1312	/// use regex_automata::{
1313	/// meta::Regex,
1314	/// Anchored, Match, PatternID, Input,
1315	/// };
1316	///
1317	/// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
1318	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1319	/// let haystack = "foo123";
1320	///
1321	/// // Since we are using the default leftmost-first match and both
1322	/// // patterns match at the same starting position, only the first pattern
1323	/// // will be returned in this case when doing a search for any of the
1324	/// // patterns.
1325	/// let expected = Some(Match::must(`0`, `0`..`6`));
1326	/// re.search_captures_with(&mut cache, &Input::new(haystack), &mut caps);
1327	/// assert_eq!(expected, caps.get_match());
1328	///
1329	/// // But if we want to check whether some other pattern matches, then we
1330	/// // can provide its pattern ID.
1331	/// let expected = Some(Match::must(`1`, `0`..`6`));
1332	/// let input = Input::new(haystack)
1333	/// .anchored(Anchored::Pattern(PatternID::must(`1`)));
1334	/// re.search_captures_with(&mut cache, &input, &mut caps);
1335	/// assert_eq!(expected, caps.get_match());
1336	///
1337	/// # Ok::<(), Box<dyn std::error::Error>>(())
1338	/// ```
1339	///
1340	/// # Example: specifying the bounds of a search
1341	///
1342	/// This example shows how providing the bounds of a search can produce
1343	/// different results than simply sub-slicing the haystack.
1344	///
1345	/// ```
1346	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1347	/// use regex_automata::{meta::Regex, Match, Input};
1348	///
1349	/// let re = Regex::new(r"\b[0-9]{3}\b")?;
1350	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1351	/// let haystack = "foo123bar";
1352	///
1353	/// // Since we sub-slice the haystack, the search doesn't know about
1354	/// // the larger context and assumes that `123` is surrounded by word
1355	/// // boundaries. And of course, the match position is reported relative
1356	/// // to the sub-slice as well, which means we get `0..3` instead of
1357	/// // `3..6`.
1358	/// let expected = Some(Match::must(`0`, `0`..`3`));
1359	/// let input = Input::new(&haystack[`3`..`6`]);
1360	/// re.search_captures_with(&mut cache, &input, &mut caps);
1361	/// assert_eq!(expected, caps.get_match());
1362	///
1363	/// // But if we provide the bounds of the search within the context of the
1364	/// // entire haystack, then the search can take the surrounding context
1365	/// // into account. (And if we did find a match, it would be reported
1366	/// // as a valid offset into `haystack` instead of its sub-slice.)
1367	/// let expected = None;
1368	/// let input = Input::new(haystack).range(`3`..`6`);
1369	/// re.search_captures_with(&mut cache, &input, &mut caps);
1370	/// assert_eq!(expected, caps.get_match());
1371	///
1372	/// # Ok::<(), Box<dyn std::error::Error>>(())
1373	/// ```
1374	#[inline]
1375	pub fn search_captures_with(
1376	&self,
1377	cache: &mut Cache,
1378	input: &Input<'_>,
1379	caps: &mut Captures,
1380	) {
1381	caps.set_pattern(None);
1382	let pid = self.search_slots_with(cache, input, caps.slots_mut());
1383	caps.set_pattern(pid);
1384	}
1385
1386	/// This is like [`Regex::search_slots`], but requires the caller to
1387	/// explicitly pass a [`Cache`].
1388	///
1389	/// # Why pass a `Cache` explicitly?
1390	///
1391	/// Passing a `Cache` explicitly will bypass the use of an internal memory
1392	/// pool used by `Regex` to get a `Cache` for a search. The use of this
1393	/// pool can be slower in some cases when a `Regex` is used from multiple
1394	/// threads simultaneously. Typically, performance only becomes an issue
1395	/// when there is heavy contention, which in turn usually only occurs
1396	/// when each thread's primary unit of work is a regex search on a small
1397	/// haystack.
1398	///
1399	/// # Example
1400	///
1401	/// This example shows how to find the overall match offsets in a
1402	/// multi-pattern search without allocating a `Captures` value. Indeed, we
1403	/// can put our slots right on the stack.
1404	///
1405	/// ```
1406	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1407	/// use regex_automata::{meta::Regex, PatternID, Input};
1408	///
1409	/// let re = Regex::new_many(&[
1410	/// r"\pL+",
1411	/// r"\d+",
1412	/// ])?;
1413	/// let mut cache = re.create_cache();
1414	/// let input = Input::new("!@#123");
1415	///
1416	/// // We only care about the overall match offsets here, so we just
1417	/// // allocate two slots for each pattern. Each slot records the start
1418	/// // and end of the match.
1419	/// let mut slots = [None; `4`];
1420	/// let pid = re.search_slots_with(&mut cache, &input, &mut slots);
1421	/// assert_eq!(Some(PatternID::must(`1`)), pid);
1422	///
1423	/// // The overall match offsets are always at 'pid 2' and 'pid * 2 + 1'.*
1424	/// // See 'GroupInfo' for more details on the mapping between groups and
1425	/// // slot indices.
1426	/// let slot_start = pid.unwrap().as_usize() * `2`;
1427	/// let slot_end = slot_start + `1`;
1428	/// assert_eq!(Some(`3`), slots[slot_start].map(\|s\| s.get()));
1429	/// assert_eq!(Some(`6`), slots[slot_end].map(\|s\| s.get()));
1430	///
1431	/// # Ok::<(), Box<dyn std::error::Error>>(())
1432	/// ```
1433	#[inline]
1434	pub fn search_slots_with(
1435	&self,
1436	cache: &mut Cache,
1437	input: &Input<'_>,
1438	slots: &mut [Option<NonMaxUsize>],
1439	) -> Option<PatternID> {
1440	if self.imp.info.is_impossible(input) {
1441	return None;
1442	}
1443	self.imp.strat.search_slots(cache, input, slots)
1444	}
1445
1446	/// This is like [`Regex::which_overlapping_matches`], but requires the
1447	/// caller to explicitly pass a [`Cache`].
1448	///
1449	/// Passing a `Cache` explicitly will bypass the use of an internal memory
1450	/// pool used by `Regex` to get a `Cache` for a search. The use of this
1451	/// pool can be slower in some cases when a `Regex` is used from multiple
1452	/// threads simultaneously. Typically, performance only becomes an issue
1453	/// when there is heavy contention, which in turn usually only occurs
1454	/// when each thread's primary unit of work is a regex search on a small
1455	/// haystack.
1456	///
1457	/// # Why pass a `Cache` explicitly?
1458	///
1459	/// # Example
1460	///
1461	/// ```
1462	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1463	/// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet};
1464	///
1465	/// let patterns = &[
1466	/// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
1467	/// ];
1468	/// let re = Regex::builder()
1469	/// .configure(Regex::config().match_kind(MatchKind::All))
1470	/// .build_many(patterns)?;
1471	/// let mut cache = re.create_cache();
1472	///
1473	/// let input = Input::new("foobar");
1474	/// let mut patset = PatternSet::new(re.pattern_len());
1475	/// re.which_overlapping_matches_with(&mut cache, &input, &mut patset);
1476	/// let expected = vec![`0`, `2`, `3`, `4`, `6`];
1477	/// let got: Vec<usize> = patset.iter().map(\|p\| p.as_usize()).collect();
1478	/// assert_eq!(expected, got);
1479	///
1480	/// # Ok::<(), Box<dyn std::error::Error>>(())
1481	/// ```
1482	#[inline]
1483	pub fn which_overlapping_matches_with(
1484	&self,
1485	cache: &mut Cache,
1486	input: &Input<'_>,
1487	patset: &mut PatternSet,
1488	) {
1489	if self.imp.info.is_impossible(input) {
1490	return;
1491	}
1492	self.imp.strat.which_overlapping_matches(cache, input, patset)
1493	}
1494	}
1495
1496	/// Various non-search routines for querying properties of a `Regex` and
1497	/// convenience routines for creating [`Captures`] and [`Cache`] values.
1498	impl Regex {
1499	/// Creates a new object for recording capture group offsets. This is used
1500	/// in search APIs like [`Regex::captures`] and [`Regex::search_captures`].
1501	///
1502	/// This is a convenience routine for
1503	/// `Captures::all(re.group_info().clone())`. Callers may build other types
1504	/// of `Captures` values that record less information (and thus require
1505	/// less work from the regex engine) using [`Captures::matches`] and
1506	/// [`Captures::empty`].
1507	///
1508	/// # Example
1509	///
1510	/// This shows some alternatives to [`Regex::create_captures`]:
1511	///
1512	/// ```
1513	/// use regex_automata::{
1514	/// meta::Regex,
1515	/// util::captures::Captures,
1516	/// Match, PatternID, Span,
1517	/// };
1518	///
1519	/// let re = Regex::new(r"(?<first>[A-Z][a-z]+) (?<last>[A-Z][a-z]+)")?;
1520	///
1521	/// // This is equivalent to Regex::create_captures. It stores matching
1522	/// // offsets for all groups in the regex.
1523	/// let mut all = Captures::all(re.group_info().clone());
1524	/// re.captures("Bruce Springsteen", &mut all);
1525	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), all.get_match());
1526	/// assert_eq!(Some(Span::from(`0`..`5`)), all.get_group_by_name("first"));
1527	/// assert_eq!(Some(Span::from(`6`..`17`)), all.get_group_by_name("last"));
1528	///
1529	/// // In this version, we only care about the implicit groups, which
1530	/// // means offsets for the explicit groups will be unavailable. It can
1531	/// // sometimes be faster to ask for fewer groups, since the underlying
1532	/// // regex engine needs to do less work to keep track of them.
1533	/// let mut matches = Captures::matches(re.group_info().clone());
1534	/// re.captures("Bruce Springsteen", &mut matches);
1535	/// // We still get the overall match info.
1536	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), matches.get_match());
1537	/// // But now the explicit groups are unavailable.
1538	/// assert_eq!(None, matches.get_group_by_name("first"));
1539	/// assert_eq!(None, matches.get_group_by_name("last"));
1540	///
1541	/// // Finally, in this version, we don't ask to keep track of offsets for
1542	/// // any* groups. All we get back is whether a match occurred, and if*
1543	/// // so, the ID of the pattern that matched.
1544	/// let mut empty = Captures::empty(re.group_info().clone());
1545	/// re.captures("Bruce Springsteen", &mut empty);
1546	/// // it's a match!
1547	/// assert!(empty.is_match());
1548	/// // for pattern ID 0
1549	/// assert_eq!(Some(PatternID::ZERO), empty.pattern());
1550	/// // Match offsets are unavailable.
1551	/// assert_eq!(None, empty.get_match());
1552	/// // And of course, explicit groups are unavailable too.
1553	/// assert_eq!(None, empty.get_group_by_name("first"));
1554	/// assert_eq!(None, empty.get_group_by_name("last"));
1555	///
1556	/// # Ok::<(), Box<dyn std::error::Error>>(())
1557	/// ```
1558	pub fn create_captures(&self) -> Captures {
1559	Captures::all(self.group_info().clone())
1560	}
1561
1562	/// Creates a new cache for use with lower level search APIs like
1563	/// [`Regex::search_with`].
1564	///
1565	/// The cache returned should only be used for searches for this `Regex`.
1566	/// If you want to reuse the cache for another `Regex`, then you must call
1567	/// [`Cache::reset`] with that `Regex`.
1568	///
1569	/// This is a convenience routine for [`Cache::new`].
1570	///
1571	/// # Example
1572	///
1573	/// ```
1574	/// use regex_automata::{meta::Regex, Input, Match};
1575	///
1576	/// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?;
1577	/// let mut cache = re.create_cache();
1578	/// let input = Input::new("crazy janey and her mission man");
1579	/// assert_eq!(
1580	/// Some(Match::must(`0`, `20`..`31`)),
1581	/// re.search_with(&mut cache, &input),
1582	/// );
1583	///
1584	/// # Ok::<(), Box<dyn std::error::Error>>(())
1585	/// ```
1586	pub fn create_cache(&self) -> Cache {
1587	self.imp.strat.create_cache()
1588	}
1589
1590	/// Returns the total number of patterns in this regex.
1591	///
1592	/// The standard [`Regex::new`] constructor always results in a `Regex`
1593	/// with a single pattern, but [`Regex::new_many`] permits building a
1594	/// multi-pattern regex.
1595	///
1596	/// A `Regex` guarantees that the maximum possible `PatternID` returned in
1597	/// any match is `Regex::pattern_len() - 1`. In the case where the number
1598	/// of patterns is `0`, a match is impossible.
1599	///
1600	/// # Example
1601	///
1602	/// ```
1603	/// use regex_automata::meta::Regex;
1604	///
1605	/// let re = Regex::new(r"(?m)^[a-z]$")?;
1606	/// assert_eq!(`1`, re.pattern_len());
1607	///
1608	/// let re = Regex::new_many::<&str>(&[])?;
1609	/// assert_eq!(`0`, re.pattern_len());
1610	///
1611	/// let re = Regex::new_many(&["a", "b", "c"])?;
1612	/// assert_eq!(`3`, re.pattern_len());
1613	///
1614	/// # Ok::<(), Box<dyn std::error::Error>>(())
1615	/// ```
1616	pub fn pattern_len(&self) -> usize {
1617	self.imp.info.pattern_len()
1618	}
1619
1620	/// Returns the total number of capturing groups.
1621	///
1622	/// This includes the implicit capturing group corresponding to the
1623	/// entire match. Therefore, the minimum value returned is `1`.
1624	///
1625	/// # Example
1626	///
1627	/// This shows a few patterns and how many capture groups they have.
1628	///
1629	/// ```
1630	/// use regex_automata::meta::Regex;
1631	///
1632	/// let len = \|pattern\| {
1633	/// Regex::new(pattern).map(\|re\| re.captures_len())
1634	/// };
1635	///
1636	/// assert_eq!(`1`, len("a")?);
1637	/// assert_eq!(`2`, len("(a)")?);
1638	/// assert_eq!(`3`, len("(a)\|(b)")?);
1639	/// assert_eq!(`5`, len("(a)(b)\|(c)(d)")?);
1640	/// assert_eq!(`2`, len("(a)\|b")?);
1641	/// assert_eq!(`2`, len("a\|(b)")?);
1642	/// assert_eq!(`2`, len("(b)*")?);
1643	/// assert_eq!(`2`, len("(b)+")?);
1644	///
1645	/// # Ok::<(), Box<dyn std::error::Error>>(())
1646	/// ```
1647	///
1648	/// # Example: multiple patterns
1649	///
1650	/// This routine also works for multiple patterns. The total number is
1651	/// the sum of the capture groups of each pattern.
1652	///
1653	/// ```
1654	/// use regex_automata::meta::Regex;
1655	///
1656	/// let len = \|patterns\| {
1657	/// Regex::new_many(patterns).map(\|re\| re.captures_len())
1658	/// };
1659	///
1660	/// assert_eq!(`2`, len(&["a", "b"])?);
1661	/// assert_eq!(`4`, len(&["(a)", "(b)"])?);
1662	/// assert_eq!(`6`, len(&["(a)\|(b)", "(c)\|(d)"])?);
1663	/// assert_eq!(`8`, len(&["(a)(b)\|(c)(d)", "(x)(y)"])?);
1664	/// assert_eq!(`3`, len(&["(a)", "b"])?);
1665	/// assert_eq!(`3`, len(&["a", "(b)"])?);
1666	/// assert_eq!(`4`, len(&["(a)", "(b)*"])?);
1667	/// assert_eq!(`4`, len(&["(a)+", "(b)+"])?);
1668	///
1669	/// # Ok::<(), Box<dyn std::error::Error>>(())
1670	/// ```
1671	pub fn captures_len(&self) -> usize {
1672	self.imp
1673	.info
1674	.props_union()
1675	.explicit_captures_len()
1676	.saturating_add(self.pattern_len())
1677	}
1678
1679	/// Returns the total number of capturing groups that appear in every
1680	/// possible match.
1681	///
1682	/// If the number of capture groups can vary depending on the match, then
1683	/// this returns `None`. That is, a value is only returned when the number
1684	/// of matching groups is invariant or "static."
1685	///
1686	/// Note that like [`Regex::captures_len`], this does* include the*
1687	/// implicit capturing group corresponding to the entire match. Therefore,
1688	/// when a non-None value is returned, it is guaranteed to be at least `1`.
1689	/// Stated differently, a return value of `Some(0)` is impossible.
1690	///
1691	/// # Example
1692	///
1693	/// This shows a few cases where a static number of capture groups is
1694	/// available and a few cases where it is not.
1695	///
1696	/// ```
1697	/// use regex_automata::meta::Regex;
1698	///
1699	/// let len = \|pattern\| {
1700	/// Regex::new(pattern).map(\|re\| re.static_captures_len())
1701	/// };
1702	///
1703	/// assert_eq!(Some(`1`), len("a")?);
1704	/// assert_eq!(Some(`2`), len("(a)")?);
1705	/// assert_eq!(Some(`2`), len("(a)\|(b)")?);
1706	/// assert_eq!(Some(`3`), len("(a)(b)\|(c)(d)")?);
1707	/// assert_eq!(None, len("(a)\|b")?);
1708	/// assert_eq!(None, len("a\|(b)")?);
1709	/// assert_eq!(None, len("(b)*")?);
1710	/// assert_eq!(Some(`2`), len("(b)+")?);
1711	///
1712	/// # Ok::<(), Box<dyn std::error::Error>>(())
1713	/// ```
1714	///
1715	/// # Example: multiple patterns
1716	///
1717	/// This property extends to regexes with multiple patterns as well. In
1718	/// order for their to be a static number of capture groups in this case,
1719	/// every pattern must have the same static number.
1720	///
1721	/// ```
1722	/// use regex_automata::meta::Regex;
1723	///
1724	/// let len = \|patterns\| {
1725	/// Regex::new_many(patterns).map(\|re\| re.static_captures_len())
1726	/// };
1727	///
1728	/// assert_eq!(Some(`1`), len(&["a", "b"])?);
1729	/// assert_eq!(Some(`2`), len(&["(a)", "(b)"])?);
1730	/// assert_eq!(Some(`2`), len(&["(a)\|(b)", "(c)\|(d)"])?);
1731	/// assert_eq!(Some(`3`), len(&["(a)(b)\|(c)(d)", "(x)(y)"])?);
1732	/// assert_eq!(None, len(&["(a)", "b"])?);
1733	/// assert_eq!(None, len(&["a", "(b)"])?);
1734	/// assert_eq!(None, len(&["(a)", "(b)*"])?);
1735	/// assert_eq!(Some(`2`), len(&["(a)+", "(b)+"])?);
1736	///
1737	/// # Ok::<(), Box<dyn std::error::Error>>(())
1738	/// ```
1739	#[inline]
1740	pub fn static_captures_len(&self) -> Option<usize> {
1741	self.imp
1742	.info
1743	.props_union()
1744	.static_explicit_captures_len()
1745	.map(\|len\| len.saturating_add(`1`))
1746	}
1747
1748	/// Return information about the capture groups in this `Regex`.
1749	///
1750	/// A `GroupInfo` is an immutable object that can be cheaply cloned. It
1751	/// is responsible for maintaining a mapping between the capture groups
1752	/// in the concrete syntax of zero or more regex patterns and their
1753	/// internal representation used by some of the regex matchers. It is also
1754	/// responsible for maintaining a mapping between the name of each group
1755	/// (if one exists) and its corresponding group index.
1756	///
1757	/// A `GroupInfo` is ultimately what is used to build a [`Captures`] value,
1758	/// which is some mutable space where group offsets are stored as a result
1759	/// of a search.
1760	///
1761	/// # Example
1762	///
1763	/// This shows some alternatives to [`Regex::create_captures`]:
1764	///
1765	/// ```
1766	/// use regex_automata::{
1767	/// meta::Regex,
1768	/// util::captures::Captures,
1769	/// Match, PatternID, Span,
1770	/// };
1771	///
1772	/// let re = Regex::new(r"(?<first>[A-Z][a-z]+) (?<last>[A-Z][a-z]+)")?;
1773	///
1774	/// // This is equivalent to Regex::create_captures. It stores matching
1775	/// // offsets for all groups in the regex.
1776	/// let mut all = Captures::all(re.group_info().clone());
1777	/// re.captures("Bruce Springsteen", &mut all);
1778	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), all.get_match());
1779	/// assert_eq!(Some(Span::from(`0`..`5`)), all.get_group_by_name("first"));
1780	/// assert_eq!(Some(Span::from(`6`..`17`)), all.get_group_by_name("last"));
1781	///
1782	/// // In this version, we only care about the implicit groups, which
1783	/// // means offsets for the explicit groups will be unavailable. It can
1784	/// // sometimes be faster to ask for fewer groups, since the underlying
1785	/// // regex engine needs to do less work to keep track of them.
1786	/// let mut matches = Captures::matches(re.group_info().clone());
1787	/// re.captures("Bruce Springsteen", &mut matches);
1788	/// // We still get the overall match info.
1789	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), matches.get_match());
1790	/// // But now the explicit groups are unavailable.
1791	/// assert_eq!(None, matches.get_group_by_name("first"));
1792	/// assert_eq!(None, matches.get_group_by_name("last"));
1793	///
1794	/// // Finally, in this version, we don't ask to keep track of offsets for
1795	/// // any* groups. All we get back is whether a match occurred, and if*
1796	/// // so, the ID of the pattern that matched.
1797	/// let mut empty = Captures::empty(re.group_info().clone());
1798	/// re.captures("Bruce Springsteen", &mut empty);
1799	/// // it's a match!
1800	/// assert!(empty.is_match());
1801	/// // for pattern ID 0
1802	/// assert_eq!(Some(PatternID::ZERO), empty.pattern());
1803	/// // Match offsets are unavailable.
1804	/// assert_eq!(None, empty.get_match());
1805	/// // And of course, explicit groups are unavailable too.
1806	/// assert_eq!(None, empty.get_group_by_name("first"));
1807	/// assert_eq!(None, empty.get_group_by_name("last"));
1808	///
1809	/// # Ok::<(), Box<dyn std::error::Error>>(())
1810	/// ```
1811	#[inline]
1812	pub fn group_info(&self) -> &GroupInfo {
1813	self.imp.strat.group_info()
1814	}
1815
1816	/// Returns the configuration object used to build this `Regex`.
1817	///
1818	/// If no configuration object was explicitly passed, then the
1819	/// configuration returned represents the default.
1820	#[inline]
1821	pub fn get_config(&self) -> &Config {
1822	self.imp.info.config()
1823	}
1824
1825	/// Returns true if this regex has a high chance of being "accelerated."
1826	///
1827	/// The precise meaning of "accelerated" is specifically left unspecified,
1828	/// but the general meaning is that the search is a high likelihood of
1829	/// running faster than than a character-at-a-time loop inside a standard
1830	/// regex engine.
1831	///
1832	/// When a regex is accelerated, it is only a probabilistic* claim. That*
1833	/// is, just because the regex is believed to be accelerated, that doesn't
1834	/// mean it will definitely execute searches very fast. Similarly, if a
1835	/// regex is not* accelerated, that is also a probabilistic claim. That*
1836	/// is, a regex for which `is_accelerated` returns `false` could still run
1837	/// searches more quickly than a regex for which `is_accelerated` returns
1838	/// `true`.
1839	///
1840	/// Whether a regex is marked as accelerated or not is dependent on
1841	/// implementations details that may change in a semver compatible release.
1842	/// That is, a regex that is accelerated in a `x.y.1` release might not be
1843	/// accelerated in a `x.y.2` release.
1844	///
1845	/// Basically, the value of acceleration boils down to a hedge: a hodge
1846	/// podge of internal heuristics combine to make a probabilistic guess
1847	/// that this regex search may run "fast." The value in knowing this from
1848	/// a caller's perspective is that it may act as a signal that no further
1849	/// work should be done to accelerate a search. For example, a grep-like
1850	/// tool might try to do some extra work extracting literals from a regex
1851	/// to create its own heuristic acceleration strategies. But it might
1852	/// choose to defer to this crate's acceleration strategy if one exists.
1853	/// This routine permits querying whether such a strategy is active for a
1854	/// particular regex.
1855	///
1856	/// # Example
1857	///
1858	/// ```
1859	/// use regex_automata::meta::Regex;
1860	///
1861	/// // A simple literal is very likely to be accelerated.
1862	/// let re = Regex::new(r"foo")?;
1863	/// assert!(re.is_accelerated());
1864	///
1865	/// // A regex with no literals is likely to not be accelerated.
1866	/// let re = Regex::new(r"\w")?;
1867	/// assert!(!re.is_accelerated());
1868	///
1869	/// # Ok::<(), Box<dyn std::error::Error>>(())
1870	/// ```
1871	#[inline]
1872	pub fn is_accelerated(&self) -> bool {
1873	self.imp.strat.is_accelerated()
1874	}
1875
1876	/// Return the total approximate heap memory, in bytes, used by this `Regex`.
1877	///
1878	/// Note that currently, there is no high level configuration for setting
1879	/// a limit on the specific value returned by this routine. Instead, the
1880	/// following routines can be used to control heap memory at a bit of a
1881	/// lower level:
1882	///
1883	/// * [`Config::nfa_size_limit`] controls how big _any_ of the NFAs are
1884	/// allowed to be.
1885	/// * [`Config::onepass_size_limit`] controls how big the one-pass DFA is
1886	/// allowed to be.
1887	/// * [`Config::hybrid_cache_capacity`] controls how much memory the lazy
1888	/// DFA is permitted to allocate to store its transition table.
1889	/// * [`Config::dfa_size_limit`] controls how big a fully compiled DFA is
1890	/// allowed to be.
1891	/// * [`Config::dfa_state_limit`] controls the conditions under which the
1892	/// meta regex engine will even attempt to build a fully compiled DFA.
1893	#[inline]
1894	pub fn memory_usage(&self) -> usize {
1895	self.imp.strat.memory_usage()
1896	}
1897	}
1898
1899	impl Clone for Regex {
1900	fn clone(&self) -> Regex {
1901	let imp: Arc = Arc::clone(&self.imp);
1902	let pool: Pool … + Sync + Send + RefUnwindSafe + UnwindSafe>> = {
1903	let strat: Arc = Arc::clone(&imp.strat);
1904	let create: CachePoolFn = Box::new(move \|\| strat.create_cache());
1905	Pool::new(create)
1906	};
1907	Regex { imp, pool }
1908	}
1909	}
1910
1911	#[derive(Clone, Debug)]
1912	pub(crate) struct RegexInfo(Arc<RegexInfoI>);
1913
1914	#[derive(Clone, Debug)]
1915	struct RegexInfoI {
1916	config: Config,
1917	props: Vec<hir::Properties>,
1918	props_union: hir::Properties,
1919	}
1920
1921	impl RegexInfo {
1922	fn new(config: Config, hirs: &[&Hir]) -> RegexInfo {
1923	// Collect all of the properties from each of the HIRs, and also
1924	// union them into one big set of properties representing all HIRs
1925	// as if they were in one big alternation.
1926	let mut props = vec![];
1927	for hir in hirs.iter() {
1928	props.push(hir.properties().clone());
1929	}
1930	let props_union = hir::Properties::union(&props);
1931
1932	RegexInfo(Arc::new(RegexInfoI { config, props, props_union }))
1933	}
1934
1935	pub(crate) fn config(&self) -> &Config {
1936	&self.0.config
1937	}
1938
1939	pub(crate) fn props(&self) -> &[hir::Properties] {
1940	&self.0.props
1941	}
1942
1943	pub(crate) fn props_union(&self) -> &hir::Properties {
1944	&self.0.props_union
1945	}
1946
1947	pub(crate) fn pattern_len(&self) -> usize {
1948	self.props().len()
1949	}
1950
1951	pub(crate) fn memory_usage(&self) -> usize {
1952	self.props().iter().map(\|p\| p.memory_usage()).sum::<usize>()
1953	+ self.props_union().memory_usage()
1954	}
1955
1956	/// Returns true when the search is guaranteed to be anchored. That is,
1957	/// when a match is reported, its offset is guaranteed to correspond to
1958	/// the start of the search.
1959	///
1960	/// This includes returning true when `input` _isn't_ anchored but the
1961	/// underlying regex is.
1962	#[cfg_attr(feature = "perf-inline", inline(always))]
1963	pub(crate) fn is_anchored_start(&self, input: &Input<'_>) -> bool {
1964	input.get_anchored().is_anchored() \|\| self.is_always_anchored_start()
1965	}
1966
1967	/// Returns true when this regex is always anchored to the start of a
1968	/// search. And in particular, that regardless of an `Input` configuration,
1969	/// if any match is reported it must start at `0`.
1970	#[cfg_attr(feature = "perf-inline", inline(always))]
1971	pub(crate) fn is_always_anchored_start(&self) -> bool {
1972	use regex_syntax::hir::Look;
1973	self.props_union().look_set_prefix().contains(Look::Start)
1974	}
1975
1976	/// Returns true when this regex is always anchored to the end of a
1977	/// search. And in particular, that regardless of an `Input` configuration,
1978	/// if any match is reported it must end at the end of the haystack.
1979	#[cfg_attr(feature = "perf-inline", inline(always))]
1980	pub(crate) fn is_always_anchored_end(&self) -> bool {
1981	use regex_syntax::hir::Look;
1982	self.props_union().look_set_suffix().contains(Look::End)
1983	}
1984
1985	/// Returns true if and only if it is known that a match is impossible
1986	/// for the given input. This is useful for short-circuiting and avoiding
1987	/// running the regex engine if it's known no match can be reported.
1988	///
1989	/// Note that this doesn't necessarily detect every possible case. For
1990	/// example, when `pattern_len() == 0`, a match is impossible, but that
1991	/// case is so rare that it's fine to be handled by the regex engine
1992	/// itself. That is, it's not worth the cost of adding it here in order to
1993	/// make it a little faster. The reason is that this is called for every
1994	/// search. so there is some cost to adding checks here. Arguably, some of
1995	/// the checks that are here already probably shouldn't be here...
1996	#[cfg_attr(feature = "perf-inline", inline(always))]
1997	fn is_impossible(&self, input: &Input<'_>) -> bool {
1998	// The underlying regex is anchored, so if we don't start the search
1999	// at position 0, a match is impossible, because the anchor can only
2000	// match at position 0.
2001	if input.start() > `0` && self.is_always_anchored_start() {
2002	return `true`;
2003	}
2004	// Same idea, but for the end anchor.
2005	if input.end() < input.haystack().len()
2006	&& self.is_always_anchored_end()
2007	{
2008	return `true`;
2009	}
2010	// If the haystack is smaller than the minimum length required, then
2011	// we know there can be no match.
2012	let minlen = match self.props_union().minimum_len() {
2013	None => return `false`,
2014	Some(minlen) => minlen,
2015	};
2016	if input.get_span().len() < minlen {
2017	return `true`;
2018	}
2019	// Same idea as minimum, but for maximum. This is trickier. We can
2020	// only apply the maximum when we know the entire span that we're
2021	// searching has* to match according to the regex (and possibly the*
2022	// input configuration). If we know there is too much for the regex
2023	// to match, we can bail early.
2024	//
2025	// I don't think we can apply the maximum otherwise unfortunately.
2026	if self.is_anchored_start(input) && self.is_always_anchored_end() {
2027	let maxlen = match self.props_union().maximum_len() {
2028	None => return `false`,
2029	Some(maxlen) => maxlen,
2030	};
2031	if input.get_span().len() > maxlen {
2032	return `true`;
2033	}
2034	}
2035	`false`
2036	}
2037	}
2038
2039	/// An iterator over all non-overlapping matches.
2040	///
2041	/// The iterator yields a [`Match`] value until no more matches could be found.
2042	///
2043	/// The lifetime parameters are as follows:
2044	///
2045	/// `'r` represents the lifetime of the `Regex` that produced this iterator.*
2046	/// `'h` represents the lifetime of the haystack being searched.*
2047	///
2048	/// This iterator can be created with the [`Regex::find_iter`] method.
2049	#[derive(Debug)]
2050	pub struct FindMatches<'r, 'h> {
2051	re: &'r Regex,
2052	cache: CachePoolGuard<'r>,
2053	it: iter::Searcher<'h>,
2054	}
2055
2056	impl<'r, 'h> FindMatches<'r, 'h> {
2057	/// Returns the `Regex` value that created this iterator.
2058	#[inline]
2059	pub fn regex(&self) -> &'r Regex {
2060	self.re
2061	}
2062
2063	/// Returns the current `Input` associated with this iterator.
2064	///
2065	/// The `start` position on the given `Input` may change during iteration,
2066	/// but all other values are guaranteed to remain invariant.
2067	#[inline]
2068	pub fn input<'s>(&'s self) -> &'s Input<'h> {
2069	self.it.input()
2070	}
2071	}
2072
2073	impl<'r, 'h> Iterator for FindMatches<'r, 'h> {
2074	type Item = Match;
2075
2076	#[inline]
2077	fn next(&mut self) -> Option<Match> {
2078	let FindMatches { re: &Regex, ref mut cache: &mut PoolGuard<'_, Cache, …>, ref mut it: &mut Searcher<'_> } = *self;
2079	it.advance(\|input: &Input<'_>\| Ok(re.search_with(cache, input)))
2080	}
2081
2082	#[inline]
2083	fn count(self) -> usize {
2084	// If all we care about is a count of matches, then we only need to
2085	// find the end position of each match. This can give us a 2x perf
2086	// boost in some cases, because it avoids needing to do a reverse scan
2087	// to find the start of a match.
2088	let FindMatches { re: &Regex, mut cache: PoolGuard<'_, Cache, Box<…>>, it: Searcher<'_> } = self;
2089	// This does the deref for PoolGuard once instead of every iter.
2090	let cache: &mut Cache = &mut *cache;
2091	itTryHalfMatchesIter<'_, impl FnMut(…) -> …>.into_half_matches_iter(
2092	\|input: &Input<'_>\| Ok(re.search_half_with(cache, input)),
2093	)
2094	.count()
2095	}
2096	}
2097
2098	impl<'r, 'h> core::iter::FusedIterator for FindMatches<'r, 'h> {}
2099
2100	/// An iterator over all non-overlapping leftmost matches with their capturing
2101	/// groups.
2102	///
2103	/// The iterator yields a [`Captures`] value until no more matches could be
2104	/// found.
2105	///
2106	/// The lifetime parameters are as follows:
2107	///
2108	/// `'r` represents the lifetime of the `Regex` that produced this iterator.*
2109	/// `'h` represents the lifetime of the haystack being searched.*
2110	///
2111	/// This iterator can be created with the [`Regex::captures_iter`] method.
2112	#[derive(Debug)]
2113	pub struct CapturesMatches<'r, 'h> {
2114	re: &'r Regex,
2115	cache: CachePoolGuard<'r>,
2116	caps: Captures,
2117	it: iter::Searcher<'h>,
2118	}
2119
2120	impl<'r, 'h> CapturesMatches<'r, 'h> {
2121	/// Returns the `Regex` value that created this iterator.
2122	#[inline]
2123	pub fn regex(&self) -> &'r Regex {
2124	self.re
2125	}
2126
2127	/// Returns the current `Input` associated with this iterator.
2128	///
2129	/// The `start` position on the given `Input` may change during iteration,
2130	/// but all other values are guaranteed to remain invariant.
2131	#[inline]
2132	pub fn input<'s>(&'s self) -> &'s Input<'h> {
2133	self.it.input()
2134	}
2135	}
2136
2137	impl<'r, 'h> Iterator for CapturesMatches<'r, 'h> {
2138	type Item = Captures;
2139
2140	#[inline]
2141	fn next(&mut self) -> Option<Captures> {
2142	// Splitting 'self' apart seems necessary to appease borrowck.
2143	let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } =
2144	*self;
2145	let _ = it.advance(\|input\| {
2146	re.search_captures_with(cache, input, caps);
2147	Ok(caps.get_match())
2148	});
2149	if caps.is_match() {
2150	Some(caps.clone())
2151	} else {
2152	None
2153	}
2154	}
2155
2156	#[inline]
2157	fn count(self) -> usize {
2158	let CapturesMatches { re, mut cache, it, .. } = self;
2159	// This does the deref for PoolGuard once instead of every iter.
2160	let cache = &mut *cache;
2161	it.into_half_matches_iter(
2162	\|input\| Ok(re.search_half_with(cache, input)),
2163	)
2164	.count()
2165	}
2166	}
2167
2168	impl<'r, 'h> core::iter::FusedIterator for CapturesMatches<'r, 'h> {}
2169
2170	/// Yields all substrings delimited by a regular expression match.
2171	///
2172	/// The spans correspond to the offsets between matches.
2173	///
2174	/// The lifetime parameters are as follows:
2175	///
2176	/// `'r` represents the lifetime of the `Regex` that produced this iterator.*
2177	/// `'h` represents the lifetime of the haystack being searched.*
2178	///
2179	/// This iterator can be created with the [`Regex::split`] method.
2180	#[derive(Debug)]
2181	pub struct Split<'r, 'h> {
2182	finder: FindMatches<'r, 'h>,
2183	last: usize,
2184	}
2185
2186	impl<'r, 'h> Split<'r, 'h> {
2187	/// Returns the current `Input` associated with this iterator.
2188	///
2189	/// The `start` position on the given `Input` may change during iteration,
2190	/// but all other values are guaranteed to remain invariant.
2191	#[inline]
2192	pub fn input<'s>(&'s self) -> &'s Input<'h> {
2193	self.finder.input()
2194	}
2195	}
2196
2197	impl<'r, 'h> Iterator for Split<'r, 'h> {
2198	type Item = Span;
2199
2200	fn next(&mut self) -> Option<Span> {
2201	match self.finder.next() {
2202	None => {
2203	let len: usize = self.finder.it.input().haystack().len();
2204	if self.last > len {
2205	None
2206	} else {
2207	let span: Span = Span::from(self.last..len);
2208	self.last = len + `1`; // Next call will return None
2209	Some(span)
2210	}
2211	}
2212	Some(m: Match) => {
2213	let span: Span = Span::from(self.last..m.start());
2214	self.last = m.end();
2215	Some(span)
2216	}
2217	}
2218	}
2219	}
2220
2221	impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
2222
2223	/// Yields at most `N` spans delimited by a regular expression match.
2224	///
2225	/// The spans correspond to the offsets between matches. The last span will be
2226	/// whatever remains after splitting.
2227	///
2228	/// The lifetime parameters are as follows:
2229	///
2230	/// `'r` represents the lifetime of the `Regex` that produced this iterator.*
2231	/// `'h` represents the lifetime of the haystack being searched.*
2232	///
2233	/// This iterator can be created with the [`Regex::splitn`] method.
2234	#[derive(Debug)]
2235	pub struct SplitN<'r, 'h> {
2236	splits: Split<'r, 'h>,
2237	limit: usize,
2238	}
2239
2240	impl<'r, 'h> SplitN<'r, 'h> {
2241	/// Returns the current `Input` associated with this iterator.
2242	///
2243	/// The `start` position on the given `Input` may change during iteration,
2244	/// but all other values are guaranteed to remain invariant.
2245	#[inline]
2246	pub fn input<'s>(&'s self) -> &'s Input<'h> {
2247	self.splits.input()
2248	}
2249	}
2250
2251	impl<'r, 'h> Iterator for SplitN<'r, 'h> {
2252	type Item = Span;
2253
2254	fn next(&mut self) -> Option<Span> {
2255	if self.limit == `0` {
2256	return None;
2257	}
2258
2259	self.limit -= `1`;
2260	if self.limit > `0` {
2261	return self.splits.next();
2262	}
2263
2264	let len = self.splits.finder.it.input().haystack().len();
2265	if self.splits.last > len {
2266	// We've already returned all substrings.
2267	None
2268	} else {
2269	// self.n == 0, so future calls will return None immediately
2270	Some(Span::from(self.splits.last..len))
2271	}
2272	}
2273
2274	fn size_hint(&self) -> (usize, Option<usize>) {
2275	(`0`, Some(self.limit))
2276	}
2277	}
2278
2279	impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
2280
2281	/// Represents mutable scratch space used by regex engines during a search.
2282	///
2283	/// Most of the regex engines in this crate require some kind of
2284	/// mutable state in order to execute a search. This mutable state is
2285	/// explicitly separated from the the core regex object (such as a
2286	/// [`thompson::NFA`](crate::nfa::thompson::NFA)) so that the read-only regex
2287	/// object can be shared across multiple threads simultaneously without any
2288	/// synchronization. Conversely, a `Cache` must either be duplicated if using
2289	/// the same `Regex` from multiple threads, or else there must be some kind of
2290	/// synchronization that guarantees exclusive access while it's in use by one
2291	/// thread.
2292	///
2293	/// A `Regex` attempts to do this synchronization for you by using a thread
2294	/// pool internally. Its size scales roughly with the number of simultaneous
2295	/// regex searches.
2296	///
2297	/// For cases where one does not want to rely on a `Regex`'s internal thread
2298	/// pool, lower level routines such as [`Regex::search_with`] are provided
2299	/// that permit callers to pass a `Cache` into the search routine explicitly.
2300	///
2301	/// General advice is that the thread pool is often more than good enough.
2302	/// However, it may be possible to observe the effects of its latency,
2303	/// especially when searching many small haystacks from many threads
2304	/// simultaneously.
2305	///
2306	/// Caches can be created from their corresponding `Regex` via
2307	/// [`Regex::create_cache`]. A cache can only be used with either the `Regex`
2308	/// that created it, or the `Regex` that was most recently used to reset it
2309	/// with [`Cache::reset`]. Using a cache with any other `Regex` may result in
2310	/// panics or incorrect results.
2311	///
2312	/// # Example
2313	///
2314	/// ```
2315	/// use regex_automata::{meta::Regex, Input, Match};
2316	///
2317	/// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?;
2318	/// let mut cache = re.create_cache();
2319	/// let input = Input::new("crazy janey and her mission man");
2320	/// assert_eq!(
2321	/// Some(Match::must(`0`, `20`..`31`)),
2322	/// re.search_with(&mut cache, &input),
2323	/// );
2324	///
2325	/// # Ok::<(), Box<dyn std::error::Error>>(())
2326	/// ```
2327	#[derive(Debug, Clone)]
2328	pub struct Cache {
2329	pub(crate) capmatches: Captures,
2330	pub(crate) pikevm: wrappers::PikeVMCache,
2331	pub(crate) backtrack: wrappers::BoundedBacktrackerCache,
2332	pub(crate) onepass: wrappers::OnePassCache,
2333	pub(crate) hybrid: wrappers::HybridCache,
2334	pub(crate) revhybrid: wrappers::ReverseHybridCache,
2335	}
2336
2337	impl Cache {
2338	/// Creates a new `Cache` for use with this regex.
2339	///
2340	/// The cache returned should only be used for searches for the given
2341	/// `Regex`. If you want to reuse the cache for another `Regex`, then you
2342	/// must call [`Cache::reset`] with that `Regex`.
2343	pub fn new(re: &Regex) -> Cache {
2344	re.create_cache()
2345	}
2346
2347	/// Reset this cache such that it can be used for searching with the given
2348	/// `Regex` (and only that `Regex`).
2349	///
2350	/// A cache reset permits potentially reusing memory already allocated in
2351	/// this cache with a different `Regex`.
2352	///
2353	/// # Example
2354	///
2355	/// This shows how to re-purpose a cache for use with a different `Regex`.
2356	///
2357	/// ```
2358	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2359	/// use regex_automata::{meta::Regex, Match, Input};
2360	///
2361	/// let re1 = Regex::new(r"\w")?;
2362	/// let re2 = Regex::new(r"\W")?;
2363	///
2364	/// let mut cache = re1.create_cache();
2365	/// assert_eq!(
2366	/// Some(Match::must(`0`, `0`..`2`)),
2367	/// re1.search_with(&mut cache, &Input::new("Δ")),
2368	/// );
2369	///
2370	/// // Using 'cache' with re2 is not allowed. It may result in panics or
2371	/// // incorrect results. In order to re-purpose the cache, we must reset
2372	/// // it with the Regex we'd like to use it with.
2373	/// //
2374	/// // Similarly, after this reset, using the cache with 're1' is also not
2375	/// // allowed.
2376	/// cache.reset(&re2);
2377	/// assert_eq!(
2378	/// Some(Match::must(`0`, `0`..`3`)),
2379	/// re2.search_with(&mut cache, &Input::new("☃")),
2380	/// );
2381	///
2382	/// # Ok::<(), Box<dyn std::error::Error>>(())
2383	/// ```
2384	pub fn reset(&mut self, re: &Regex) {
2385	re.imp.strat.reset_cache(self)
2386	}
2387
2388	/// Returns the heap memory usage, in bytes, of this cache.
2389	///
2390	/// This does not* include the stack size used up by this cache. To*
2391	/// compute that, use `std::mem::size_of::<Cache>()`.
2392	pub fn memory_usage(&self) -> usize {
2393	let mut bytes = `0`;
2394	bytes += self.pikevm.memory_usage();
2395	bytes += self.backtrack.memory_usage();
2396	bytes += self.onepass.memory_usage();
2397	bytes += self.hybrid.memory_usage();
2398	bytes += self.revhybrid.memory_usage();
2399	bytes
2400	}
2401	}
2402
2403	/// An object describing the configuration of a `Regex`.
2404	///
2405	/// This configuration only includes options for the
2406	/// non-syntax behavior of a `Regex`, and can be applied via the
2407	/// [`Builder::configure`] method. For configuring the syntax options, see
2408	/// [`util::syntax::Config`](crate::util::syntax::Config).
2409	///
2410	/// # Example: lower the NFA size limit
2411	///
2412	/// In some cases, the default size limit might be too big. The size limit can
2413	/// be lowered, which will prevent large regex patterns from compiling.
2414	///
2415	/// ```
2416	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2417	/// use regex_automata::meta::Regex;
2418	///
2419	/// let result = Regex::builder()
2420	/// .configure(Regex::config().nfa_size_limit(Some(`20` * (`1`<<`10`))))
2421	/// // Not even 20KB is enough to build a single large Unicode class!
2422	/// .build(r"\pL");
2423	/// assert!(result.is_err());
2424	///
2425	/// # Ok::<(), Box<dyn std::error::Error>>(())
2426	/// ```
2427	#[derive(Clone, Debug, Default)]
2428	pub struct Config {
2429	// As with other configuration types in this crate, we put all our knobs
2430	// in options so that we can distinguish between "default" and "not set."
2431	// This makes it possible to easily combine multiple configurations
2432	// without default values overwriting explicitly specified values. See the
2433	// 'overwrite' method.
2434	//
2435	// For docs on the fields below, see the corresponding method setters.
2436	match_kind: Option<MatchKind>,
2437	utf8_empty: Option<bool>,
2438	autopre: Option<bool>,
2439	pre: Option<Option<Prefilter>>,
2440	which_captures: Option<WhichCaptures>,
2441	nfa_size_limit: Option<Option<usize>>,
2442	onepass_size_limit: Option<Option<usize>>,
2443	hybrid_cache_capacity: Option<usize>,
2444	hybrid: Option<bool>,
2445	dfa: Option<bool>,
2446	dfa_size_limit: Option<Option<usize>>,
2447	dfa_state_limit: Option<Option<usize>>,
2448	onepass: Option<bool>,
2449	backtrack: Option<bool>,
2450	byte_classes: Option<bool>,
2451	line_terminator: Option<u8>,
2452	}
2453
2454	impl Config {
2455	/// Create a new configuration object for a `Regex`.
2456	pub fn new() -> Config {
2457	Config::default()
2458	}
2459
2460	/// Set the match semantics for a `Regex`.
2461	///
2462	/// The default value is [`MatchKind::LeftmostFirst`].
2463	///
2464	/// # Example
2465	///
2466	/// ```
2467	/// use regex_automata::{meta::Regex, Match, MatchKind};
2468	///
2469	/// // By default, leftmost-first semantics are used, which
2470	/// // disambiguates matches at the same position by selecting
2471	/// // the one that corresponds earlier in the pattern.
2472	/// let re = Regex::new("sam\|samwise")?;
2473	/// assert_eq!(Some(Match::must(`0`, `0`..`3`)), re.find("samwise"));
2474	///
2475	/// // But with 'all' semantics, match priority is ignored
2476	/// // and all match states are included. When coupled with
2477	/// // a leftmost search, the search will report the last
2478	/// // possible match.
2479	/// let re = Regex::builder()
2480	/// .configure(Regex::config().match_kind(MatchKind::All))
2481	/// .build("sam\|samwise")?;
2482	/// assert_eq!(Some(Match::must(`0`, `0`..`7`)), re.find("samwise"));
2483	/// // Beware that this can lead to skipping matches!
2484	/// // Usually 'all' is used for anchored reverse searches
2485	/// // only, or for overlapping searches.
2486	/// assert_eq!(Some(Match::must(`0`, `4`..`11`)), re.find("sam samwise"));
2487	///
2488	/// # Ok::<(), Box<dyn std::error::Error>>(())
2489	/// ```
2490	pub fn match_kind(self, kind: MatchKind) -> Config {
2491	Config { match_kind: Some(kind), ..self }
2492	}
2493
2494	/// Toggles whether empty matches are permitted to occur between the code
2495	/// units of a UTF-8 encoded codepoint.
2496	///
2497	/// This should generally be enabled when search a `&str` or anything that
2498	/// you otherwise know is valid UTF-8. It should be disabled in all other
2499	/// cases. Namely, if the haystack is not valid UTF-8 and this is enabled,
2500	/// then behavior is unspecified.
2501	///
2502	/// By default, this is enabled.
2503	///
2504	/// # Example
2505	///
2506	/// ```
2507	/// use regex_automata::{meta::Regex, Match};
2508	///
2509	/// let re = Regex::new("")?;
2510	/// let got: Vec<Match> = re.find_iter("☃").collect();
2511	/// // Matches only occur at the beginning and end of the snowman.
2512	/// assert_eq!(got, vec![
2513	/// Match::must(`0`, `0`..`0`),
2514	/// Match::must(`0`, `3`..`3`),
2515	/// ]);
2516	///
2517	/// let re = Regex::builder()
2518	/// .configure(Regex::config().utf8_empty(`false`))
2519	/// .build("")?;
2520	/// let got: Vec<Match> = re.find_iter("☃").collect();
2521	/// // Matches now occur at every position!
2522	/// assert_eq!(got, vec![
2523	/// Match::must(`0`, `0`..`0`),
2524	/// Match::must(`0`, `1`..`1`),
2525	/// Match::must(`0`, `2`..`2`),
2526	/// Match::must(`0`, `3`..`3`),
2527	/// ]);
2528	///
2529	/// Ok::<(), Box<dyn std::error::Error>>(())
2530	/// ```
2531	pub fn utf8_empty(self, yes: bool) -> Config {
2532	Config { utf8_empty: Some(yes), ..self }
2533	}
2534
2535	/// Toggles whether automatic prefilter support is enabled.
2536	///
2537	/// If this is disabled and [`Config::prefilter`] is not set, then the
2538	/// meta regex engine will not use any prefilters. This can sometimes
2539	/// be beneficial in cases where you know (or have measured) that the
2540	/// prefilter leads to overall worse search performance.
2541	///
2542	/// By default, this is enabled.
2543	///
2544	/// # Example
2545	///
2546	/// ```
2547	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2548	/// use regex_automata::{meta::Regex, Match};
2549	///
2550	/// let re = Regex::builder()
2551	/// .configure(Regex::config().auto_prefilter(`false`))
2552	/// .build(r"Bruce \w+")?;
2553	/// let hay = "Hello Bruce Springsteen!";
2554	/// assert_eq!(Some(Match::must(`0`, `6`..`23`)), re.find(hay));
2555	///
2556	/// Ok::<(), Box<dyn std::error::Error>>(())
2557	/// ```
2558	pub fn auto_prefilter(self, yes: bool) -> Config {
2559	Config { autopre: Some(yes), ..self }
2560	}
2561
2562	/// Overrides and sets the prefilter to use inside a `Regex`.
2563	///
2564	/// This permits one to forcefully set a prefilter in cases where the
2565	/// caller knows better than whatever the automatic prefilter logic is
2566	/// capable of.
2567	///
2568	/// By default, this is set to `None` and an automatic prefilter will be
2569	/// used if one could be built. (Assuming [`Config::auto_prefilter`] is
2570	/// enabled, which it is by default.)
2571	///
2572	/// # Example
2573	///
2574	/// This example shows how to set your own prefilter. In the case of a
2575	/// pattern like `Bruce \w+`, the automatic prefilter is likely to be
2576	/// constructed in a way that it will look for occurrences of `Bruce `.
2577	/// In most cases, this is the best choice. But in some cases, it may be
2578	/// the case that running `memchr` on `B` is the best choice. One can
2579	/// achieve that behavior by overriding the automatic prefilter logic
2580	/// and providing a prefilter that just matches `B`.
2581	///
2582	/// ```
2583	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2584	/// use regex_automata::{
2585	/// meta::Regex,
2586	/// util::prefilter::Prefilter,
2587	/// Match, MatchKind,
2588	/// };
2589	///
2590	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["B"])
2591	/// .expect("a prefilter");
2592	/// let re = Regex::builder()
2593	/// .configure(Regex::config().prefilter(Some(pre)))
2594	/// .build(r"Bruce \w+")?;
2595	/// let hay = "Hello Bruce Springsteen!";
2596	/// assert_eq!(Some(Match::must(`0`, `6`..`23`)), re.find(hay));
2597	///
2598	/// # Ok::<(), Box<dyn std::error::Error>>(())
2599	/// ```
2600	///
2601	/// # Example: incorrect prefilters can lead to incorrect results!
2602	///
2603	/// Be warned that setting an incorrect prefilter can lead to missed
2604	/// matches. So if you use this option, ensure your prefilter can _never_
2605	/// report false negatives. (A false positive is, on the other hand, quite
2606	/// okay and generally unavoidable.)
2607	///
2608	/// ```
2609	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2610	/// use regex_automata::{
2611	/// meta::Regex,
2612	/// util::prefilter::Prefilter,
2613	/// Match, MatchKind,
2614	/// };
2615	///
2616	/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Z"])
2617	/// .expect("a prefilter");
2618	/// let re = Regex::builder()
2619	/// .configure(Regex::config().prefilter(Some(pre)))
2620	/// .build(r"Bruce \w+")?;
2621	/// let hay = "Hello Bruce Springsteen!";
2622	/// // Oops! No match found, but there should be one!
2623	/// assert_eq!(None, re.find(hay));
2624	///
2625	/// # Ok::<(), Box<dyn std::error::Error>>(())
2626	/// ```
2627	pub fn prefilter(self, pre: Option<Prefilter>) -> Config {
2628	Config { pre: Some(pre), ..self }
2629	}
2630
2631	/// Configures what kinds of groups are compiled as "capturing" in the
2632	/// underlying regex engine.
2633	///
2634	/// This is set to [`WhichCaptures::All`] by default. Callers may wish to
2635	/// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
2636	/// overhead of capture states for explicit groups.
2637	///
2638	/// Note that another approach to avoiding the overhead of capture groups
2639	/// is by using non-capturing groups in the regex pattern. That is,
2640	/// `(?:a)` instead of `(a)`. This option is useful when you can't control
2641	/// the concrete syntax but know that you don't need the underlying capture
2642	/// states. For example, using `WhichCaptures::Implicit` will behave as if
2643	/// all explicit capturing groups in the pattern were non-capturing.
2644	///
2645	/// Setting this to `WhichCaptures::None` is usually not the right thing to
2646	/// do. When no capture states are compiled, some regex engines (such as
2647	/// the `PikeVM`) won't be able to report match offsets. This will manifest
2648	/// as no match being found.
2649	///
2650	/// # Example
2651	///
2652	/// This example demonstrates how the results of capture groups can change
2653	/// based on this option. First we show the default (all capture groups in
2654	/// the pattern are capturing):
2655	///
2656	/// ```
2657	/// use regex_automata::{meta::Regex, Match, Span};
2658	///
2659	/// let re = Regex::new(r"foo([0-9]+)bar")?;
2660	/// let hay = "foo123bar";
2661	///
2662	/// let mut caps = re.create_captures();
2663	/// re.captures(hay, &mut caps);
2664	/// assert_eq!(Some(Span::from(`0`..`9`)), caps.get_group(`0`));
2665	/// assert_eq!(Some(Span::from(`3`..`6`)), caps.get_group(`1`));
2666	///
2667	/// Ok::<(), Box<dyn std::error::Error>>(())
2668	/// ```
2669	///
2670	/// And now we show the behavior when we only include implicit capture
2671	/// groups. In this case, we can only find the overall match span, but the
2672	/// spans of any other explicit group don't exist because they are treated
2673	/// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used,
2674	/// there is no real point in using [`Regex::captures`] since it will never
2675	/// be able to report more information than [`Regex::find`].)
2676	///
2677	/// ```
2678	/// use regex_automata::{
2679	/// meta::Regex,
2680	/// nfa::thompson::WhichCaptures,
2681	/// Match,
2682	/// Span,
2683	/// };
2684	///
2685	/// let re = Regex::builder()
2686	/// .configure(Regex::config().which_captures(WhichCaptures::Implicit))
2687	/// .build(r"foo([0-9]+)bar")?;
2688	/// let hay = "foo123bar";
2689	///
2690	/// let mut caps = re.create_captures();
2691	/// re.captures(hay, &mut caps);
2692	/// assert_eq!(Some(Span::from(`0`..`9`)), caps.get_group(`0`));
2693	/// assert_eq!(None, caps.get_group(`1`));
2694	///
2695	/// Ok::<(), Box<dyn std::error::Error>>(())
2696	/// ```
2697	pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config {
2698	self.which_captures = Some(which_captures);
2699	self
2700	}
2701
2702	/// Sets the size limit, in bytes, to enforce on the construction of every
2703	/// NFA build by the meta regex engine.
2704	///
2705	/// Setting it to `None` disables the limit. This is not recommended if
2706	/// you're compiling untrusted patterns.
2707	///
2708	/// Note that this limit is applied to _each_ NFA built, and if any of
2709	/// them exceed the limit, then construction will fail. This limit does
2710	/// _not_ correspond to the total memory used by all NFAs in the meta regex
2711	/// engine.
2712	///
2713	/// This defaults to some reasonable number that permits most reasonable
2714	/// patterns.
2715	///
2716	/// # Example
2717	///
2718	/// ```
2719	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2720	/// use regex_automata::meta::Regex;
2721	///
2722	/// let result = Regex::builder()
2723	/// .configure(Regex::config().nfa_size_limit(Some(`20` * (`1`<<`10`))))
2724	/// // Not even 20KB is enough to build a single large Unicode class!
2725	/// .build(r"\pL");
2726	/// assert!(result.is_err());
2727	///
2728	/// // But notice that building such a regex with the exact same limit
2729	/// // can succeed depending on other aspects of the configuration. For
2730	/// // example, a single forward* NFA will (at time of writing) fit into*
2731	/// // the 20KB limit, but a reverse* NFA of the same pattern will not.*
2732	/// // So if one configures a meta regex such that a reverse NFA is never
2733	/// // needed and thus never built, then the 20KB limit will be enough for
2734	/// // a pattern like \pL!
2735	/// let result = Regex::builder()
2736	/// .configure(Regex::config()
2737	/// .nfa_size_limit(Some(`20` * (`1`<<`10`)))
2738	/// // The DFAs are the only thing that (currently) need a reverse
2739	/// // NFA. So if both are disabled, the meta regex engine will
2740	/// // skip building the reverse NFA. Note that this isn't an API
2741	/// // guarantee. A future semver compatible version may introduce
2742	/// // new use cases for a reverse NFA.
2743	/// .hybrid(`false`)
2744	/// .dfa(`false`)
2745	/// )
2746	/// // Not even 20KB is enough to build a single large Unicode class!
2747	/// .build(r"\pL");
2748	/// assert!(result.is_ok());
2749	///
2750	/// # Ok::<(), Box<dyn std::error::Error>>(())
2751	/// ```
2752	pub fn nfa_size_limit(self, limit: Option<usize>) -> Config {
2753	Config { nfa_size_limit: Some(limit), ..self }
2754	}
2755
2756	/// Sets the size limit, in bytes, for the one-pass DFA.
2757	///
2758	/// Setting it to `None` disables the limit. Disabling the limit is
2759	/// strongly discouraged when compiling untrusted patterns. Even if the
2760	/// patterns are trusted, it still may not be a good idea, since a one-pass
2761	/// DFA can use a lot of memory. With that said, as the size of a regex
2762	/// increases, the likelihood of it being one-pass likely decreases.
2763	///
2764	/// This defaults to some reasonable number that permits most reasonable
2765	/// one-pass patterns.
2766	///
2767	/// # Example
2768	///
2769	/// This shows how to set the one-pass DFA size limit. Note that since
2770	/// a one-pass DFA is an optional component of the meta regex engine,
2771	/// this size limit only impacts what is built internally and will never
2772	/// determine whether a `Regex` itself fails to build.
2773	///
2774	/// ```
2775	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2776	/// use regex_automata::meta::Regex;
2777	///
2778	/// let result = Regex::builder()
2779	/// .configure(Regex::config().onepass_size_limit(Some(`2` * (`1`<<`20`))))
2780	/// .build(r"\pL{5}");
2781	/// assert!(result.is_ok());
2782	/// # Ok::<(), Box<dyn std::error::Error>>(())
2783	/// ```
2784	pub fn onepass_size_limit(self, limit: Option<usize>) -> Config {
2785	Config { onepass_size_limit: Some(limit), ..self }
2786	}
2787
2788	/// Set the cache capacity, in bytes, for the lazy DFA.
2789	///
2790	/// The cache capacity of the lazy DFA determines approximately how much
2791	/// heap memory it is allowed to use to store its state transitions. The
2792	/// state transitions are computed at search time, and if the cache fills
2793	/// up it, it is cleared. At this point, any previously generated state
2794	/// transitions are lost and are re-generated if they're needed again.
2795	///
2796	/// This sort of cache filling and clearing works quite well _so long as
2797	/// cache clearing happens infrequently_. If it happens too often, then the
2798	/// meta regex engine will stop using the lazy DFA and switch over to a
2799	/// different regex engine.
2800	///
2801	/// In cases where the cache is cleared too often, it may be possible to
2802	/// give the cache more space and reduce (or eliminate) how often it is
2803	/// cleared. Similarly, sometimes a regex is so big that the lazy DFA isn't
2804	/// used at all if its cache capacity isn't big enough.
2805	///
2806	/// The capacity set here is a _limit_ on how much memory is used. The
2807	/// actual memory used is only allocated as it's needed.
2808	///
2809	/// Determining the right value for this is a little tricky and will likely
2810	/// required some profiling. Enabling the `logging` feature and setting the
2811	/// log level to `trace` will also tell you how often the cache is being
2812	/// cleared.
2813	///
2814	/// # Example
2815	///
2816	/// ```
2817	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2818	/// use regex_automata::meta::Regex;
2819	///
2820	/// let result = Regex::builder()
2821	/// .configure(Regex::config().hybrid_cache_capacity(`20` * (`1`<<`20`)))
2822	/// .build(r"\pL{5}");
2823	/// assert!(result.is_ok());
2824	/// # Ok::<(), Box<dyn std::error::Error>>(())
2825	/// ```
2826	pub fn hybrid_cache_capacity(self, limit: usize) -> Config {
2827	Config { hybrid_cache_capacity: Some(limit), ..self }
2828	}
2829
2830	/// Sets the size limit, in bytes, for heap memory used for a fully
2831	/// compiled DFA.
2832	///
2833	/// NOTE:* If you increase this, you'll likely also need to increase*
2834	/// [`Config::dfa_state_limit`].
2835	///
2836	/// In contrast to the lazy DFA, building a full DFA requires computing
2837	/// all of its state transitions up front. This can be a very expensive
2838	/// process, and runs in worst case `2^n` time and space (where `n` is
2839	/// proportional to the size of the regex). However, a full DFA unlocks
2840	/// some additional optimization opportunities.
2841	///
2842	/// Because full DFAs can be so expensive, the default limits for them are
2843	/// incredibly small. Generally speaking, if your regex is moderately big
2844	/// or if you're using Unicode features (`\w` is Unicode-aware by default
2845	/// for example), then you can expect that the meta regex engine won't even
2846	/// attempt to build a DFA for it.
2847	///
2848	/// If this and [`Config::dfa_state_limit`] are set to `None`, then the
2849	/// meta regex will not use any sort of limits when deciding whether to
2850	/// build a DFA. This in turn makes construction of a `Regex` take
2851	/// worst case exponential time and space. Even short patterns can result
2852	/// in huge space blow ups. So it is strongly recommended to keep some kind
2853	/// of limit set!
2854	///
2855	/// The default is set to a small number that permits some simple regexes
2856	/// to get compiled into DFAs in reasonable time.
2857	///
2858	/// # Example
2859	///
2860	/// ```
2861	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2862	/// use regex_automata::meta::Regex;
2863	///
2864	/// let result = Regex::builder()
2865	/// // 100MB is much bigger than the default.
2866	/// .configure(Regex::config()
2867	/// .dfa_size_limit(Some(`100` * (`1`<<`20`)))
2868	/// // We don't care about size too much here, so just
2869	/// // remove the NFA state limit altogether.
2870	/// .dfa_state_limit(None))
2871	/// .build(r"\pL{5}");
2872	/// assert!(result.is_ok());
2873	/// # Ok::<(), Box<dyn std::error::Error>>(())
2874	/// ```
2875	pub fn dfa_size_limit(self, limit: Option<usize>) -> Config {
2876	Config { dfa_size_limit: Some(limit), ..self }
2877	}
2878
2879	/// Sets a limit on the total number of NFA states, beyond which, a full
2880	/// DFA is not attempted to be compiled.
2881	///
2882	/// This limit works in concert with [`Config::dfa_size_limit`]. Namely,
2883	/// where as `Config::dfa_size_limit` is applied by attempting to construct
2884	/// a DFA, this limit is used to avoid the attempt in the first place. This
2885	/// is useful to avoid hefty initialization costs associated with building
2886	/// a DFA for cases where it is obvious the DFA will ultimately be too big.
2887	///
2888	/// By default, this is set to a very small number.
2889	///
2890	/// # Example
2891	///
2892	/// ```
2893	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
2894	/// use regex_automata::meta::Regex;
2895	///
2896	/// let result = Regex::builder()
2897	/// .configure(Regex::config()
2898	/// // Sometimes the default state limit rejects DFAs even
2899	/// // if they would fit in the size limit. Here, we disable
2900	/// // the check on the number of NFA states and just rely on
2901	/// // the size limit.
2902	/// .dfa_state_limit(None))
2903	/// .build(r"(?-u)\w{30}");
2904	/// assert!(result.is_ok());
2905	/// # Ok::<(), Box<dyn std::error::Error>>(())
2906	/// ```
2907	pub fn dfa_state_limit(self, limit: Option<usize>) -> Config {
2908	Config { dfa_state_limit: Some(limit), ..self }
2909	}
2910
2911	/// Whether to attempt to shrink the size of the alphabet for the regex
2912	/// pattern or not. When enabled, the alphabet is shrunk into a set of
2913	/// equivalence classes, where every byte in the same equivalence class
2914	/// cannot discriminate between a match or non-match.
2915	///
2916	/// WARNING:* This is only useful for debugging DFAs. Disabling this*
2917	/// does not yield any speed advantages. Indeed, disabling it can result
2918	/// in much higher memory usage. Disabling byte classes is useful for
2919	/// debugging the actual generated transitions because it lets one see the
2920	/// transitions defined on actual bytes instead of the equivalence classes.
2921	///
2922	/// This option is enabled by default and should never be disabled unless
2923	/// one is debugging the meta regex engine's internals.
2924	///
2925	/// # Example
2926	///
2927	/// ```
2928	/// use regex_automata::{meta::Regex, Match};
2929	///
2930	/// let re = Regex::builder()
2931	/// .configure(Regex::config().byte_classes(`false`))
2932	/// .build(r"[a-z]+")?;
2933	/// let hay = "!!quux!!";
2934	/// assert_eq!(Some(Match::must(`0`, `2`..`6`)), re.find(hay));
2935	///
2936	/// # Ok::<(), Box<dyn std::error::Error>>(())
2937	/// ```
2938	pub fn byte_classes(self, yes: bool) -> Config {
2939	Config { byte_classes: Some(yes), ..self }
2940	}
2941
2942	/// Set the line terminator to be used by the `^` and `$` anchors in
2943	/// multi-line mode.
2944	///
2945	/// This option has no effect when CRLF mode is enabled. That is,
2946	/// regardless of this setting, `(?Rm:^)` and `(?Rm:$)` will always treat
2947	/// `\r` and `\n` as line terminators (and will never match between a `\r`
2948	/// and a `\n`).
2949	///
2950	/// By default, `\n` is the line terminator.
2951	///
2952	/// Warning: This does not change the behavior of `.`. To do that,
2953	/// you'll need to configure the syntax option
2954	/// [`syntax::Config::line_terminator`](crate::util::syntax::Config::line_terminator)
2955	/// in addition to this. Otherwise, `.` will continue to match any
2956	/// character other than `\n`.
2957	///
2958	/// # Example
2959	///
2960	/// ```
2961	/// use regex_automata::{meta::Regex, util::syntax, Match};
2962	///
2963	/// let re = Regex::builder()
2964	/// .syntax(syntax::Config::new().multi_line(`true`))
2965	/// .configure(Regex::config().line_terminator(b'`\x00`'))
2966	/// .build(r"^foo$")?;
2967	/// let hay = "`\x00`foo`\x00`";
2968	/// assert_eq!(Some(Match::must(`0`, `1`..`4`)), re.find(hay));
2969	///
2970	/// # Ok::<(), Box<dyn std::error::Error>>(())
2971	/// ```
2972	pub fn line_terminator(self, byte: u8) -> Config {
2973	Config { line_terminator: Some(byte), ..self }
2974	}
2975
2976	/// Toggle whether the hybrid NFA/DFA (also known as the "lazy DFA") should
2977	/// be available for use by the meta regex engine.
2978	///
2979	/// Enabling this does not necessarily mean that the lazy DFA will
2980	/// definitely be used. It just means that it will be _available_ for use
2981	/// if the meta regex engine thinks it will be useful.
2982	///
2983	/// When the `hybrid` crate feature is enabled, then this is enabled by
2984	/// default. Otherwise, if the crate feature is disabled, then this is
2985	/// always disabled, regardless of its setting by the caller.
2986	pub fn hybrid(self, yes: bool) -> Config {
2987	Config { hybrid: Some(yes), ..self }
2988	}
2989
2990	/// Toggle whether a fully compiled DFA should be available for use by the
2991	/// meta regex engine.
2992	///
2993	/// Enabling this does not necessarily mean that a DFA will definitely be
2994	/// used. It just means that it will be _available_ for use if the meta
2995	/// regex engine thinks it will be useful.
2996	///
2997	/// When the `dfa-build` crate feature is enabled, then this is enabled by
2998	/// default. Otherwise, if the crate feature is disabled, then this is
2999	/// always disabled, regardless of its setting by the caller.
3000	pub fn dfa(self, yes: bool) -> Config {
3001	Config { dfa: Some(yes), ..self }
3002	}
3003
3004	/// Toggle whether a one-pass DFA should be available for use by the meta
3005	/// regex engine.
3006	///
3007	/// Enabling this does not necessarily mean that a one-pass DFA will
3008	/// definitely be used. It just means that it will be _available_ for
3009	/// use if the meta regex engine thinks it will be useful. (Indeed, a
3010	/// one-pass DFA can only be used when the regex is one-pass. See the
3011	/// [`dfa::onepass`](crate::dfa::onepass) module for more details.)
3012	///
3013	/// When the `dfa-onepass` crate feature is enabled, then this is enabled
3014	/// by default. Otherwise, if the crate feature is disabled, then this is
3015	/// always disabled, regardless of its setting by the caller.
3016	pub fn onepass(self, yes: bool) -> Config {
3017	Config { onepass: Some(yes), ..self }
3018	}
3019
3020	/// Toggle whether a bounded backtracking regex engine should be available
3021	/// for use by the meta regex engine.
3022	///
3023	/// Enabling this does not necessarily mean that a bounded backtracker will
3024	/// definitely be used. It just means that it will be _available_ for use
3025	/// if the meta regex engine thinks it will be useful.
3026	///
3027	/// When the `nfa-backtrack` crate feature is enabled, then this is enabled
3028	/// by default. Otherwise, if the crate feature is disabled, then this is
3029	/// always disabled, regardless of its setting by the caller.
3030	pub fn backtrack(self, yes: bool) -> Config {
3031	Config { backtrack: Some(yes), ..self }
3032	}
3033
3034	/// Returns the match kind on this configuration, as set by
3035	/// [`Config::match_kind`].
3036	///
3037	/// If it was not explicitly set, then a default value is returned.
3038	pub fn get_match_kind(&self) -> MatchKind {
3039	self.match_kind.unwrap_or(MatchKind::LeftmostFirst)
3040	}
3041
3042	/// Returns whether empty matches must fall on valid UTF-8 boundaries, as
3043	/// set by [`Config::utf8_empty`].
3044	///
3045	/// If it was not explicitly set, then a default value is returned.
3046	pub fn get_utf8_empty(&self) -> bool {
3047	self.utf8_empty.unwrap_or(`true`)
3048	}
3049
3050	/// Returns whether automatic prefilters are enabled, as set by
3051	/// [`Config::auto_prefilter`].
3052	///
3053	/// If it was not explicitly set, then a default value is returned.
3054	pub fn get_auto_prefilter(&self) -> bool {
3055	self.autopre.unwrap_or(`true`)
3056	}
3057
3058	/// Returns a manually set prefilter, if one was set by
3059	/// [`Config::prefilter`].
3060	///
3061	/// If it was not explicitly set, then a default value is returned.
3062	pub fn get_prefilter(&self) -> Option<&Prefilter> {
3063	self.pre.as_ref().unwrap_or(&None).as_ref()
3064	}
3065
3066	/// Returns the capture configuration, as set by
3067	/// [`Config::which_captures`].
3068	///
3069	/// If it was not explicitly set, then a default value is returned.
3070	pub fn get_which_captures(&self) -> WhichCaptures {
3071	self.which_captures.unwrap_or(WhichCaptures::All)
3072	}
3073
3074	/// Returns NFA size limit, as set by [`Config::nfa_size_limit`].
3075	///
3076	/// If it was not explicitly set, then a default value is returned.
3077	pub fn get_nfa_size_limit(&self) -> Option<usize> {
3078	self.nfa_size_limit.unwrap_or(Some(`10` * (`1` << `20`)))
3079	}
3080
3081	/// Returns one-pass DFA size limit, as set by
3082	/// [`Config::onepass_size_limit`].
3083	///
3084	/// If it was not explicitly set, then a default value is returned.
3085	pub fn get_onepass_size_limit(&self) -> Option<usize> {
3086	self.onepass_size_limit.unwrap_or(Some(`1` * (`1` << `20`)))
3087	}
3088
3089	/// Returns hybrid NFA/DFA cache capacity, as set by
3090	/// [`Config::hybrid_cache_capacity`].
3091	///
3092	/// If it was not explicitly set, then a default value is returned.
3093	pub fn get_hybrid_cache_capacity(&self) -> usize {
3094	self.hybrid_cache_capacity.unwrap_or(`2` * (`1` << `20`))
3095	}
3096
3097	/// Returns DFA size limit, as set by [`Config::dfa_size_limit`].
3098	///
3099	/// If it was not explicitly set, then a default value is returned.
3100	pub fn get_dfa_size_limit(&self) -> Option<usize> {
3101	// The default for this is VERY small because building a full DFA is
3102	// ridiculously costly. But for regexes that are very small, it can be
3103	// beneficial to use a full DFA. In particular, a full DFA can enable
3104	// additional optimizations via something called "accelerated" states.
3105	// Namely, when there's a state with only a few outgoing transitions,
3106	// we can temporary suspend walking the transition table and use memchr
3107	// for just those outgoing transitions to skip ahead very quickly.
3108	//
3109	// Generally speaking, if Unicode is enabled in your regex and you're
3110	// using some kind of Unicode feature, then it's going to blow this
3111	// size limit. Moreover, Unicode tends to defeat the "accelerated"
3112	// state optimization too, so it's a double whammy.
3113	//
3114	// We also use a limit on the number of NFA states to avoid even
3115	// starting the DFA construction process. Namely, DFA construction
3116	// itself could make lots of initial allocs proportional to the size
3117	// of the NFA, and if the NFA is large, it doesn't make sense to pay
3118	// that cost if we know it's likely to be blown by a large margin.
3119	self.dfa_size_limit.unwrap_or(Some(`40` * (`1` << `10`)))
3120	}
3121
3122	/// Returns DFA size limit in terms of the number of states in the NFA, as
3123	/// set by [`Config::dfa_state_limit`].
3124	///
3125	/// If it was not explicitly set, then a default value is returned.
3126	pub fn get_dfa_state_limit(&self) -> Option<usize> {
3127	// Again, as with the size limit, we keep this very small.
3128	self.dfa_state_limit.unwrap_or(Some(`30`))
3129	}
3130
3131	/// Returns whether byte classes are enabled, as set by
3132	/// [`Config::byte_classes`].
3133	///
3134	/// If it was not explicitly set, then a default value is returned.
3135	pub fn get_byte_classes(&self) -> bool {
3136	self.byte_classes.unwrap_or(`true`)
3137	}
3138
3139	/// Returns the line terminator for this configuration, as set by
3140	/// [`Config::line_terminator`].
3141	///
3142	/// If it was not explicitly set, then a default value is returned.
3143	pub fn get_line_terminator(&self) -> u8 {
3144	self.line_terminator.unwrap_or(b'`\n`')
3145	}
3146
3147	/// Returns whether the hybrid NFA/DFA regex engine may be used, as set by
3148	/// [`Config::hybrid`].
3149	///
3150	/// If it was not explicitly set, then a default value is returned.
3151	pub fn get_hybrid(&self) -> bool {
3152	#[cfg(feature = "hybrid")]
3153	{
3154	self.hybrid.unwrap_or(`true`)
3155	}
3156	#[cfg(not(feature = "hybrid"))]
3157	{
3158	`false`
3159	}
3160	}
3161
3162	/// Returns whether the DFA regex engine may be used, as set by
3163	/// [`Config::dfa`].
3164	///
3165	/// If it was not explicitly set, then a default value is returned.
3166	pub fn get_dfa(&self) -> bool {
3167	#[cfg(feature = "dfa-build")]
3168	{
3169	self.dfa.unwrap_or(`true`)
3170	}
3171	#[cfg(not(feature = "dfa-build"))]
3172	{
3173	`false`
3174	}
3175	}
3176
3177	/// Returns whether the one-pass DFA regex engine may be used, as set by
3178	/// [`Config::onepass`].
3179	///
3180	/// If it was not explicitly set, then a default value is returned.
3181	pub fn get_onepass(&self) -> bool {
3182	#[cfg(feature = "dfa-onepass")]
3183	{
3184	self.onepass.unwrap_or(`true`)
3185	}
3186	#[cfg(not(feature = "dfa-onepass"))]
3187	{
3188	`false`
3189	}
3190	}
3191
3192	/// Returns whether the bounded backtracking regex engine may be used, as
3193	/// set by [`Config::backtrack`].
3194	///
3195	/// If it was not explicitly set, then a default value is returned.
3196	pub fn get_backtrack(&self) -> bool {
3197	#[cfg(feature = "nfa-backtrack")]
3198	{
3199	self.backtrack.unwrap_or(`true`)
3200	}
3201	#[cfg(not(feature = "nfa-backtrack"))]
3202	{
3203	`false`
3204	}
3205	}
3206
3207	/// Overwrite the default configuration such that the options in `o` are
3208	/// always used. If an option in `o` is not set, then the corresponding
3209	/// option in `self` is used. If it's not set in `self` either, then it
3210	/// remains not set.
3211	pub(crate) fn overwrite(&self, o: Config) -> Config {
3212	Config {
3213	match_kind: o.match_kind.or(self.match_kind),
3214	utf8_empty: o.utf8_empty.or(self.utf8_empty),
3215	autopre: o.autopre.or(self.autopre),
3216	pre: o.pre.or_else(\|\| self.pre.clone()),
3217	which_captures: o.which_captures.or(self.which_captures),
3218	nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit),
3219	onepass_size_limit: o
3220	.onepass_size_limit
3221	.or(self.onepass_size_limit),
3222	hybrid_cache_capacity: o
3223	.hybrid_cache_capacity
3224	.or(self.hybrid_cache_capacity),
3225	hybrid: o.hybrid.or(self.hybrid),
3226	dfa: o.dfa.or(self.dfa),
3227	dfa_size_limit: o.dfa_size_limit.or(self.dfa_size_limit),
3228	dfa_state_limit: o.dfa_state_limit.or(self.dfa_state_limit),
3229	onepass: o.onepass.or(self.onepass),
3230	backtrack: o.backtrack.or(self.backtrack),
3231	byte_classes: o.byte_classes.or(self.byte_classes),
3232	line_terminator: o.line_terminator.or(self.line_terminator),
3233	}
3234	}
3235	}
3236
3237	/// A builder for configuring and constructing a `Regex`.
3238	///
3239	/// The builder permits configuring two different aspects of a `Regex`:
3240	///
3241	/// * [`Builder::configure`] will set high-level configuration options as
3242	/// described by a [`Config`].
3243	/// * [`Builder::syntax`] will set the syntax level configuration options
3244	/// as described by a [`util::syntax::Config`](crate::util::syntax::Config).
3245	/// This only applies when building a `Regex` from pattern strings.
3246	///
3247	/// Once configured, the builder can then be used to construct a `Regex` from
3248	/// one of 4 different inputs:
3249	///
3250	/// * [`Builder::build`] creates a regex from a single pattern string.
3251	/// * [`Builder::build_many`] creates a regex from many pattern strings.
3252	/// * [`Builder::build_from_hir`] creates a regex from a
3253	/// [`regex-syntax::Hir`](Hir) expression.
3254	/// * [`Builder::build_many_from_hir`] creates a regex from many
3255	/// [`regex-syntax::Hir`](Hir) expressions.
3256	///
3257	/// The latter two methods in particular provide a way to construct a fully
3258	/// feature regular expression matcher directly from an `Hir` expression
3259	/// without having to first convert it to a string. (This is in contrast to the
3260	/// top-level `regex` crate which intentionally provides no such API in order
3261	/// to avoid making `regex-syntax` a public dependency.)
3262	///
3263	/// As a convenience, this builder may be created via [`Regex::builder`], which
3264	/// may help avoid an extra import.
3265	///
3266	/// # Example: change the line terminator
3267	///
3268	/// This example shows how to enable multi-line mode by default and change the
3269	/// line terminator to the NUL byte:
3270	///
3271	/// ```
3272	/// use regex_automata::{meta::Regex, util::syntax, Match};
3273	///
3274	/// let re = Regex::builder()
3275	/// .syntax(syntax::Config::new().multi_line(`true`))
3276	/// .configure(Regex::config().line_terminator(b'`\x00`'))
3277	/// .build(r"^foo$")?;
3278	/// let hay = "`\x00`foo`\x00`";
3279	/// assert_eq!(Some(Match::must(`0`, `1`..`4`)), re.find(hay));
3280	///
3281	/// # Ok::<(), Box<dyn std::error::Error>>(())
3282	/// ```
3283	///
3284	/// # Example: disable UTF-8 requirement
3285	///
3286	/// By default, regex patterns are required to match UTF-8. This includes
3287	/// regex patterns that can produce matches of length zero. In the case of an
3288	/// empty match, by default, matches will not appear between the code units of
3289	/// a UTF-8 encoded codepoint.
3290	///
3291	/// However, it can be useful to disable this requirement, particularly if
3292	/// you're searching things like `&[u8]` that are not known to be valid UTF-8.
3293	///
3294	/// ```
3295	/// use regex_automata::{meta::Regex, util::syntax, Match};
3296	///
3297	/// let mut builder = Regex::builder();
3298	/// // Disables the requirement that non-empty matches match UTF-8.
3299	/// builder.syntax(syntax::Config::new().utf8(`false`));
3300	/// // Disables the requirement that empty matches match UTF-8 boundaries.
3301	/// builder.configure(Regex::config().utf8_empty(`false`));
3302	///
3303	/// // We can match raw bytes via \xZZ syntax, but we need to disable
3304	/// // Unicode mode to do that. We could disable it everywhere, or just
3305	/// // selectively, as shown here.
3306	/// let re = builder.build(r"(?-u:\xFF)foo(?-u:\xFF)")?;
3307	/// let hay = b"`\xFF`foo`\xFF`";
3308	/// assert_eq!(Some(Match::must(`0`, `0`..`5`)), re.find(hay));
3309	///
3310	/// // We can also match between code units.
3311	/// let re = builder.build(r"")?;
3312	/// let hay = "☃";
3313	/// assert_eq!(re.find_iter(hay).collect::<Vec<Match>>(), vec![
3314	/// Match::must(`0`, `0`..`0`),
3315	/// Match::must(`0`, `1`..`1`),
3316	/// Match::must(`0`, `2`..`2`),
3317	/// Match::must(`0`, `3`..`3`),
3318	/// ]);
3319	///
3320	/// # Ok::<(), Box<dyn std::error::Error>>(())
3321	/// ```
3322	#[derive(Clone, Debug)]
3323	pub struct Builder {
3324	config: Config,
3325	ast: ast::parse::ParserBuilder,
3326	hir: hir::translate::TranslatorBuilder,
3327	}
3328
3329	impl Builder {
3330	/// Creates a new builder for configuring and constructing a [`Regex`].
3331	pub fn new() -> Builder {
3332	Builder {
3333	config: Config::default(),
3334	ast: ast::parse::ParserBuilder::new(),
3335	hir: hir::translate::TranslatorBuilder::new(),
3336	}
3337	}
3338
3339	/// Builds a `Regex` from a single pattern string.
3340	///
3341	/// If there was a problem parsing the pattern or a problem turning it into
3342	/// a regex matcher, then an error is returned.
3343	///
3344	/// # Example
3345	///
3346	/// This example shows how to configure syntax options.
3347	///
3348	/// ```
3349	/// use regex_automata::{meta::Regex, util::syntax, Match};
3350	///
3351	/// let re = Regex::builder()
3352	/// .syntax(syntax::Config::new().crlf(`true`).multi_line(`true`))
3353	/// .build(r"^foo$")?;
3354	/// let hay = "`\r\n`foo`\r\n`";
3355	/// assert_eq!(Some(Match::must(`0`, `2`..`5`)), re.find(hay));
3356	///
3357	/// # Ok::<(), Box<dyn std::error::Error>>(())
3358	/// ```
3359	pub fn build(&self, pattern: &str) -> Result<Regex, BuildError> {
3360	self.build_many(&[pattern])
3361	}
3362
3363	/// Builds a `Regex` from many pattern strings.
3364	///
3365	/// If there was a problem parsing any of the patterns or a problem turning
3366	/// them into a regex matcher, then an error is returned.
3367	///
3368	/// # Example: finding the pattern that caused an error
3369	///
3370	/// When a syntax error occurs, it is possible to ask which pattern
3371	/// caused the syntax error.
3372	///
3373	/// ```
3374	/// use regex_automata::{meta::Regex, PatternID};
3375	///
3376	/// let err = Regex::builder()
3377	/// .build_many(&["a", "b", r"\p{Foo}", "c"])
3378	/// .unwrap_err();
3379	/// assert_eq!(Some(PatternID::must(`2`)), err.pattern());
3380	/// ```
3381	///
3382	/// # Example: zero patterns is valid
3383	///
3384	/// Building a regex with zero patterns results in a regex that never
3385	/// matches anything. Because this routine is generic, passing an empty
3386	/// slice usually requires a turbo-fish (or something else to help type
3387	/// inference).
3388	///
3389	/// ```
3390	/// use regex_automata::{meta::Regex, util::syntax, Match};
3391	///
3392	/// let re = Regex::builder()
3393	/// .build_many::<&str>(&[])?;
3394	/// assert_eq!(None, re.find(""));
3395	///
3396	/// # Ok::<(), Box<dyn std::error::Error>>(())
3397	/// ```
3398	pub fn build_many<P: AsRef<str>>(
3399	&self,
3400	patterns: &[P],
3401	) -> Result<Regex, BuildError> {
3402	use crate::util::primitives::IteratorIndexExt;
3403	log! {
3404	debug!("building meta regex with {} patterns:", patterns.len());
3405	for (pid, p) in patterns.iter().with_pattern_ids() {
3406	let p = p.as_ref();
3407	// We might split a grapheme with this truncation logic, but
3408	// that's fine. We at least avoid splitting a codepoint.
3409	let maxoff = p
3410	.char_indices()
3411	.map(\|(i, ch)\| i + ch.len_utf8())
3412	.take(`1000`)
3413	.last()
3414	.unwrap_or(`0`);
3415	if maxoff < p.len() {
3416	debug!("{:?}: {}[... snip ...]", pid, &p[..maxoff]);
3417	} else {
3418	debug!("{:?}: {}", pid, p);
3419	}
3420	}
3421	}
3422	let (mut asts, mut hirs) = (vec![], vec![]);
3423	for (pid, p) in patterns.iter().with_pattern_ids() {
3424	let ast = self
3425	.ast
3426	.build()
3427	.parse(p.as_ref())
3428	.map_err(\|err\| BuildError::ast(pid, err))?;
3429	asts.push(ast);
3430	}
3431	for ((pid, p), ast) in
3432	patterns.iter().with_pattern_ids().zip(asts.iter())
3433	{
3434	let hir = self
3435	.hir
3436	.build()
3437	.translate(p.as_ref(), ast)
3438	.map_err(\|err\| BuildError::hir(pid, err))?;
3439	hirs.push(hir);
3440	}
3441	self.build_many_from_hir(&hirs)
3442	}
3443
3444	/// Builds a `Regex` directly from an `Hir` expression.
3445	///
3446	/// This is useful if you needed to parse a pattern string into an `Hir`
3447	/// for other reasons (such as analysis or transformations). This routine
3448	/// permits building a `Regex` directly from the `Hir` expression instead
3449	/// of first converting the `Hir` back to a pattern string.
3450	///
3451	/// When using this method, any options set via [`Builder::syntax`] are
3452	/// ignored. Namely, the syntax options only apply when parsing a pattern
3453	/// string, which isn't relevant here.
3454	///
3455	/// If there was a problem building the underlying regex matcher for the
3456	/// given `Hir`, then an error is returned.
3457	///
3458	/// # Example
3459	///
3460	/// This example shows how one can hand-construct an `Hir` expression and
3461	/// build a regex from it without doing any parsing at all.
3462	///
3463	/// ```
3464	/// use {
3465	/// regex_automata::{meta::Regex, Match},
3466	/// regex_syntax::hir::{Hir, Look},
3467	/// };
3468	///
3469	/// // (?Rm)^foo$
3470	/// let hir = Hir::concat(vec![
3471	/// Hir::look(Look::StartCRLF),
3472	/// Hir::literal("foo".as_bytes()),
3473	/// Hir::look(Look::EndCRLF),
3474	/// ]);
3475	/// let re = Regex::builder()
3476	/// .build_from_hir(&hir)?;
3477	/// let hay = "`\r\n`foo`\r\n`";
3478	/// assert_eq!(Some(Match::must(`0`, `2`..`5`)), re.find(hay));
3479	///
3480	/// Ok::<(), Box<dyn std::error::Error>>(())
3481	/// ```
3482	pub fn build_from_hir(&self, hir: &Hir) -> Result<Regex, BuildError> {
3483	self.build_many_from_hir(&[hir])
3484	}
3485
3486	/// Builds a `Regex` directly from many `Hir` expressions.
3487	///
3488	/// This is useful if you needed to parse pattern strings into `Hir`
3489	/// expressions for other reasons (such as analysis or transformations).
3490	/// This routine permits building a `Regex` directly from the `Hir`
3491	/// expressions instead of first converting the `Hir` expressions back to
3492	/// pattern strings.
3493	///
3494	/// When using this method, any options set via [`Builder::syntax`] are
3495	/// ignored. Namely, the syntax options only apply when parsing a pattern
3496	/// string, which isn't relevant here.
3497	///
3498	/// If there was a problem building the underlying regex matcher for the
3499	/// given `Hir` expressions, then an error is returned.
3500	///
3501	/// Note that unlike [`Builder::build_many`], this can only fail as a
3502	/// result of building the underlying matcher. In that case, there is
3503	/// no single `Hir` expression that can be isolated as a reason for the
3504	/// failure. So if this routine fails, it's not possible to determine which
3505	/// `Hir` expression caused the failure.
3506	///
3507	/// # Example
3508	///
3509	/// This example shows how one can hand-construct multiple `Hir`
3510	/// expressions and build a single regex from them without doing any
3511	/// parsing at all.
3512	///
3513	/// ```
3514	/// use {
3515	/// regex_automata::{meta::Regex, Match},
3516	/// regex_syntax::hir::{Hir, Look},
3517	/// };
3518	///
3519	/// // (?Rm)^foo$
3520	/// let hir1 = Hir::concat(vec![
3521	/// Hir::look(Look::StartCRLF),
3522	/// Hir::literal("foo".as_bytes()),
3523	/// Hir::look(Look::EndCRLF),
3524	/// ]);
3525	/// // (?Rm)^bar$
3526	/// let hir2 = Hir::concat(vec![
3527	/// Hir::look(Look::StartCRLF),
3528	/// Hir::literal("bar".as_bytes()),
3529	/// Hir::look(Look::EndCRLF),
3530	/// ]);
3531	/// let re = Regex::builder()
3532	/// .build_many_from_hir(&[&hir1, &hir2])?;
3533	/// let hay = "`\r\n`foo`\r\n`bar";
3534	/// let got: Vec<Match> = re.find_iter(hay).collect();
3535	/// let expected = vec![
3536	/// Match::must(`0`, `2`..`5`),
3537	/// Match::must(`1`, `7`..`10`),
3538	/// ];
3539	/// assert_eq!(expected, got);
3540	///
3541	/// Ok::<(), Box<dyn std::error::Error>>(())
3542	/// ```
3543	pub fn build_many_from_hir<H: Borrow<Hir>>(
3544	&self,
3545	hirs: &[H],
3546	) -> Result<Regex, BuildError> {
3547	let config = self.config.clone();
3548	// We collect the HIRs into a vec so we can write internal routines
3549	// with '&[&Hir]'. i.e., Don't use generics everywhere to keep code
3550	// bloat down..
3551	let hirs: Vec<&Hir> = hirs.iter().map(\|hir\| hir.borrow()).collect();
3552	let info = RegexInfo::new(config, &hirs);
3553	let strat = strategy::new(&info, &hirs)?;
3554	let pool = {
3555	let strat = Arc::clone(&strat);
3556	let create: CachePoolFn = Box::new(move \|\| strat.create_cache());
3557	Pool::new(create)
3558	};
3559	Ok(Regex { imp: Arc::new(RegexI { strat, info }), pool })
3560	}
3561
3562	/// Configure the behavior of a `Regex`.
3563	///
3564	/// This configuration controls non-syntax options related to the behavior
3565	/// of a `Regex`. This includes things like whether empty matches can split
3566	/// a codepoint, prefilters, line terminators and a long list of options
3567	/// for configuring which regex engines the meta regex engine will be able
3568	/// to use internally.
3569	///
3570	/// # Example
3571	///
3572	/// This example shows how to disable UTF-8 empty mode. This will permit
3573	/// empty matches to occur between the UTF-8 encoding of a codepoint.
3574	///
3575	/// ```
3576	/// use regex_automata::{meta::Regex, Match};
3577	///
3578	/// let re = Regex::new("")?;
3579	/// let got: Vec<Match> = re.find_iter("☃").collect();
3580	/// // Matches only occur at the beginning and end of the snowman.
3581	/// assert_eq!(got, vec![
3582	/// Match::must(`0`, `0`..`0`),
3583	/// Match::must(`0`, `3`..`3`),
3584	/// ]);
3585	///
3586	/// let re = Regex::builder()
3587	/// .configure(Regex::config().utf8_empty(`false`))
3588	/// .build("")?;
3589	/// let got: Vec<Match> = re.find_iter("☃").collect();
3590	/// // Matches now occur at every position!
3591	/// assert_eq!(got, vec![
3592	/// Match::must(`0`, `0`..`0`),
3593	/// Match::must(`0`, `1`..`1`),
3594	/// Match::must(`0`, `2`..`2`),
3595	/// Match::must(`0`, `3`..`3`),
3596	/// ]);
3597	///
3598	/// Ok::<(), Box<dyn std::error::Error>>(())
3599	/// ```
3600	pub fn configure(&mut self, config: Config) -> &mut Builder {
3601	self.config = self.config.overwrite(config);
3602	self
3603	}
3604
3605	/// Configure the syntax options when parsing a pattern string while
3606	/// building a `Regex`.
3607	///
3608	/// These options _only_ apply when [`Builder::build`] or [`Builder::build_many`]
3609	/// are used. The other build methods accept `Hir` values, which have
3610	/// already been parsed.
3611	///
3612	/// # Example
3613	///
3614	/// This example shows how to enable case insensitive mode.
3615	///
3616	/// ```
3617	/// use regex_automata::{meta::Regex, util::syntax, Match};
3618	///
3619	/// let re = Regex::builder()
3620	/// .syntax(syntax::Config::new().case_insensitive(`true`))
3621	/// .build(r"δ")?;
3622	/// assert_eq!(Some(Match::must(`0`, `0`..`2`)), re.find(r"Δ"));
3623	///
3624	/// Ok::<(), Box<dyn std::error::Error>>(())
3625	/// ```
3626	pub fn syntax(
3627	&mut self,
3628	config: crate::util::syntax::Config,
3629	) -> &mut Builder {
3630	config.apply_ast(&mut self.ast);
3631	config.apply_hir(&mut self.hir);
3632	self
3633	}
3634	}
3635
3636	#[cfg(test)]
3637	mod tests {
3638	use super::*;
3639
3640	// I found this in the course of building out the benchmark suite for
3641	// rebar.
3642	#[test]
3643	fn regression_suffix_literal_count() {
3644	let _ = env_logger::try_init();
3645
3646	let re = Regex::new(r"[a-zA-Z]+ing").unwrap();
3647	assert_eq!(`1`, re.find_iter("tingling").count());
3648	}
3649	}
3650

Provided by KDAB

Definitions