regex.rs source code [crates/regex-automata-0.2.0/src/dfa/regex.rs]

1	/!*
2	A DFA-backed `Regex`.
3
4	This module provides [`Regex`], which is defined generically over the
5	[`Automaton`] trait. A `Regex` implements convenience routines you might have
6	come to expect, such as finding the start/end of a match and iterating over
7	all non-overlapping matches. This `Regex` type is limited in its capabilities
8	to what a DFA can provide. Therefore, APIs involving capturing groups, for
9	example, are not provided.
10
11	Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that
12	finds the end offset of a match, where as the other is a "reverse" DFA that
13	find the start offset of a match.
14
15	See the [parent module](crate::dfa) for examples.
16	*/
17
18	#[cfg(feature = "alloc")]
19	use alloc::vec::Vec;
20
21	use crate::{
22	dfa::automaton::{Automaton, OverlappingState},
23	util::prefilter::{self, Prefilter},
24	MatchError, MultiMatch,
25	};
26	#[cfg(feature = "alloc")]
27	use crate::{
28	dfa::{dense, error::Error, sparse},
29	nfa::thompson,
30	util::matchtypes::MatchKind,
31	};
32
33	// When the alloc feature is enabled, the regex type sets its A type parameter
34	// to default to an owned dense DFA. But without alloc, we set no default. This
35	// makes things a lot more convenient in the common case, since writing out the
36	// DFA types is pretty annoying.
37	//
38	// Since we have two different definitions but only want to write one doc
39	// string, we use a macro to capture the doc and other attributes once and then
40	// repeat them for each definition.
41	macro_rules! define_regex_type {
42	($(#[$doc:meta])*) => {
43	#[cfg(feature = "alloc")]
44	$(#[$doc])*
45	pub struct Regex<A = dense::OwnedDFA, P = prefilter::None> {
46	prefilter: Option<P>,
47	forward: A,
48	reverse: A,
49	utf8: bool,
50	}
51
52	#[cfg(not(feature = "alloc"))]
53	$(#[$doc])*
54	pub struct Regex<A, P = prefilter::None> {
55	prefilter: Option<P>,
56	forward: A,
57	reverse: A,
58	utf8: bool,
59	}
60	};
61	}
62
63	define_regex_type!(
64	/// A regular expression that uses deterministic finite automata for fast
65	/// searching.
66	///
67	/// A regular expression is comprised of two DFAs, a "forward" DFA and a
68	/// "reverse" DFA. The forward DFA is responsible for detecting the end of
69	/// a match while the reverse DFA is responsible for detecting the start
70	/// of a match. Thus, in order to find the bounds of any given match, a
71	/// forward search must first be run followed by a reverse search. A match
72	/// found by the forward DFA guarantees that the reverse DFA will also find
73	/// a match.
74	///
75	/// The type of the DFA used by a `Regex` corresponds to the `A` type
76	/// parameter, which must satisfy the [`Automaton`] trait. Typically,
77	/// `A` is either a [`dense::DFA`](crate::dfa::dense::DFA) or a
78	/// [`sparse::DFA`](crate::dfa::sparse::DFA), where dense DFAs use more
79	/// memory but search faster, while sparse DFAs use less memory but search
80	/// more slowly.
81	///
82	/// By default, a regex's automaton type parameter is set to
83	/// `dense::DFA<Vec<u32>>` when the `alloc` feature is enabled. For most
84	/// in-memory work loads, this is the most convenient type that gives the
85	/// best search performance. When the `alloc` feature is disabled, no
86	/// default type is used.
87	///
88	/// A `Regex` also has a `P` type parameter, which is used to select the
89	/// prefilter used during search. By default, no prefilter is enabled by
90	/// setting the type to default to [`prefilter::None`]. A prefilter can be
91	/// enabled by using the [`Regex::prefilter`] method.
92	///
93	/// # When should I use this?
94	///
95	/// Generally speaking, if you can afford the overhead of building a full
96	/// DFA for your regex, and you don't need things like capturing groups,
97	/// then this is a good choice if you're looking to optimize for matching
98	/// speed. Note however that its speed may be worse than a general purpose
99	/// regex engine if you don't select a good [prefilter].
100	///
101	/// # Earliest vs Leftmost vs Overlapping
102	///
103	/// The search routines exposed on a `Regex` reflect three different ways
104	/// of searching:
105	///
106	/// "earliest" means to stop as soon as a match has been detected.*
107	/// "leftmost" means to continue matching until the underlying*
108	/// automaton cannot advance. This reflects "standard" searching you
109	/// might be used to in other regex engines. e.g., This permits
110	/// non-greedy and greedy searching to work as you would expect.
111	/// "overlapping" means to find all possible matches, even if they*
112	/// overlap.
113	///
114	/// Generally speaking, when doing an overlapping search, you'll want to
115	/// build your regex DFAs with [`MatchKind::All`] semantics. Using
116	/// [`MatchKind::LeftmostFirst`] semantics with overlapping searches is
117	/// likely to lead to odd behavior since `LeftmostFirst` specifically omits
118	/// some matches that can never be reported due to its semantics.
119	///
120	/// The following example shows the differences between how these different
121	/// types of searches impact looking for matches of `[a-z]+` in the
122	/// haystack `abc`.
123	///
124	/// ```
125	/// use regex_automata::{dfa::{self, dense}, MatchKind, MultiMatch};
126	///
127	/// let pattern = r"[a-z]+";
128	/// let haystack = "abc".as_bytes();
129	///
130	/// // With leftmost-first semantics, we test "earliest" and "leftmost".
131	/// let re = dfa::regex::Builder::new()
132	/// .dense(dense::Config::new().match_kind(MatchKind::LeftmostFirst))
133	/// .build(pattern)?;
134	///
135	/// // "earliest" searching isn't impacted by greediness
136	/// let mut it = re.find_earliest_iter(haystack);
137	/// assert_eq!(Some(MultiMatch::must(0, 0, 1)), it.next());
138	/// assert_eq!(Some(MultiMatch::must(0, 1, 2)), it.next());
139	/// assert_eq!(Some(MultiMatch::must(0, 2, 3)), it.next());
140	/// assert_eq!(None, it.next());
141	///
142	/// // "leftmost" searching supports greediness (and non-greediness)
143	/// let mut it = re.find_leftmost_iter(haystack);
144	/// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
145	/// assert_eq!(None, it.next());
146	///
147	/// // For overlapping, we want "all" match kind semantics.
148	/// let re = dfa::regex::Builder::new()
149	/// .dense(dense::Config::new().match_kind(MatchKind::All))
150	/// .build(pattern)?;
151	///
152	/// // In the overlapping search, we find all three possible matches
153	/// // starting at the beginning of the haystack.
154	/// let mut it = re.find_overlapping_iter(haystack);
155	/// assert_eq!(Some(MultiMatch::must(0, 0, 1)), it.next());
156	/// assert_eq!(Some(MultiMatch::must(0, 0, 2)), it.next());
157	/// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
158	/// assert_eq!(None, it.next());
159	///
160	/// # Ok::<(), Box<dyn std::error::Error>>(())
161	/// ```
162	///
163	/// # Sparse DFAs
164	///
165	/// Since a `Regex` is generic over the [`Automaton`] trait, it can be
166	/// used with any kind of DFA. While this crate constructs dense DFAs by
167	/// default, it is easy enough to build corresponding sparse DFAs, and then
168	/// build a regex from them:
169	///
170	/// ```
171	/// use regex_automata::dfa::regex::Regex;
172	///
173	/// // First, build a regex that uses dense DFAs.
174	/// let dense_re = Regex::new("foo[0-9]+")?;
175	///
176	/// // Second, build sparse DFAs from the forward and reverse dense DFAs.
177	/// let fwd = dense_re.forward().to_sparse()?;
178	/// let rev = dense_re.reverse().to_sparse()?;
179	///
180	/// // Third, build a new regex from the constituent sparse DFAs.
181	/// let sparse_re = Regex::builder().build_from_dfas(fwd, rev);
182	///
183	/// // A regex that uses sparse DFAs can be used just like with dense DFAs.
184	/// assert_eq!(true, sparse_re.is_match(b"foo123"));
185	///
186	/// # Ok::<(), Box<dyn std::error::Error>>(())
187	/// ```
188	///
189	/// Alternatively, one can use a [`Builder`] to construct a sparse DFA
190	/// more succinctly. (Note though that dense DFAs are still constructed
191	/// first internally, and then converted to sparse DFAs, as in the example
192	/// above.)
193	///
194	/// ```
195	/// use regex_automata::dfa::regex::Regex;
196	///
197	/// let sparse_re = Regex::builder().build_sparse(r"foo[0-9]+")?;
198	/// // A regex that uses sparse DFAs can be used just like with dense DFAs.
199	/// assert!(sparse_re.is_match(b"foo123"));
200	///
201	/// # Ok::<(), Box<dyn std::error::Error>>(())
202	/// ```
203	///
204	/// # Fallibility
205	///
206	/// In non-default configurations, the DFAs generated in this module may
207	/// return an error during a search. (Currently, the only way this happens
208	/// is if quit bytes are added or Unicode word boundaries are heuristically
209	/// enabled, both of which are turned off by default.) For convenience, the
210	/// main search routines, like [`find_leftmost`](Regex::find_leftmost),
211	/// will panic if an error occurs. However, if you need to use DFAs
212	/// which may produce an error at search time, then there are fallible
213	/// equivalents of all search routines. For example, for `find_leftmost`,
214	/// its fallible analog is [`try_find_leftmost`](Regex::try_find_leftmost).
215	/// The routines prefixed with `try_` return `Result<Option<MultiMatch>,
216	/// MatchError>`, where as the infallible routines simply return
217	/// `Option<MultiMatch>`.
218	///
219	/// # Example
220	///
221	/// This example shows how to cause a search to terminate if it sees a
222	/// `\n` byte, and handle the error returned. This could be useful if, for
223	/// example, you wanted to prevent a user supplied pattern from matching
224	/// across a line boundary.
225	///
226	/// ```
227	/// use regex_automata::{dfa::{self, regex::Regex}, MatchError};
228	///
229	/// let re = Regex::builder()
230	/// .dense(dfa::dense::Config::new().quit(b'\n', true))
231	/// .build(r"foo\p{any}+bar")?;
232	///
233	/// let haystack = "foo\nbar".as_bytes();
234	/// // Normally this would produce a match, since \p{any} contains '\n'.
235	/// // But since we instructed the automaton to enter a quit state if a
236	/// // '\n' is observed, this produces a match error instead.
237	/// let expected = MatchError::Quit { byte: 0x0A, offset: 3 };
238	/// let got = re.try_find_leftmost(haystack).unwrap_err();
239	/// assert_eq!(expected, got);
240	///
241	/// # Ok::<(), Box<dyn std::error::Error>>(())
242	/// ```
243	#[derive(Clone, Debug)]
244	);
245
246	#[cfg(feature = "alloc")]
247	impl Regex {
248	/// Parse the given regular expression using the default configuration and
249	/// return the corresponding regex.
250	///
251	/// If you want a non-default configuration, then use the [`Builder`] to
252	/// set your own configuration.
253	///
254	/// # Example
255	///
256	/// ```
257	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
258	///
259	/// let re = Regex::new("foo[0-9]+bar")?;
260	/// assert_eq!(
261	/// Some(MultiMatch::must(0, 3, 14)),
262	/// re.find_leftmost(b"zzzfoo12345barzzz"),
263	/// );
264	/// # Ok::<(), Box<dyn std::error::Error>>(())
265	/// ```
266	pub fn new(pattern: &str) -> Result<Regex, Error> {
267	Builder::new().build(pattern)
268	}
269
270	/// Like `new`, but parses multiple patterns into a single "regex set."
271	/// This similarly uses the default regex configuration.
272	///
273	/// # Example
274	///
275	/// ```
276	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
277	///
278	/// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?;
279	///
280	/// let mut it = re.find_leftmost_iter(b"abc 1 foo 4567 0 quux");
281	/// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
282	/// assert_eq!(Some(MultiMatch::must(1, 4, 5)), it.next());
283	/// assert_eq!(Some(MultiMatch::must(0, 6, 9)), it.next());
284	/// assert_eq!(Some(MultiMatch::must(1, 10, 14)), it.next());
285	/// assert_eq!(Some(MultiMatch::must(1, 15, 16)), it.next());
286	/// assert_eq!(Some(MultiMatch::must(0, 17, 21)), it.next());
287	/// assert_eq!(None, it.next());
288	/// # Ok::<(), Box<dyn std::error::Error>>(())
289	/// ```
290	pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<Regex, Error> {
291	Builder::new().build_many(patterns)
292	}
293	}
294
295	#[cfg(feature = "alloc")]
296	impl Regex<sparse::DFA<Vec<u8>>> {
297	/// Parse the given regular expression using the default configuration,
298	/// except using sparse DFAs, and return the corresponding regex.
299	///
300	/// If you want a non-default configuration, then use the [`Builder`] to
301	/// set your own configuration.
302	///
303	/// # Example
304	///
305	/// ```
306	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
307	///
308	/// let re = Regex::new_sparse("foo[0-9]+bar")?;
309	/// assert_eq!(
310	/// Some(MultiMatch::must(0, 3, 14)),
311	/// re.find_leftmost(b"zzzfoo12345barzzz"),
312	/// );
313	/// # Ok::<(), Box<dyn std::error::Error>>(())
314	/// ```
315	pub fn new_sparse(
316	pattern: &str,
317	) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
318	Builder::new().build_sparse(pattern)
319	}
320
321	/// Like `new`, but parses multiple patterns into a single "regex set"
322	/// using sparse DFAs. This otherwise similarly uses the default regex
323	/// configuration.
324	///
325	/// # Example
326	///
327	/// ```
328	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
329	///
330	/// let re = Regex::new_many_sparse(&["[a-z]+", "[0-9]+"])?;
331	///
332	/// let mut it = re.find_leftmost_iter(b"abc 1 foo 4567 0 quux");
333	/// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
334	/// assert_eq!(Some(MultiMatch::must(1, 4, 5)), it.next());
335	/// assert_eq!(Some(MultiMatch::must(0, 6, 9)), it.next());
336	/// assert_eq!(Some(MultiMatch::must(1, 10, 14)), it.next());
337	/// assert_eq!(Some(MultiMatch::must(1, 15, 16)), it.next());
338	/// assert_eq!(Some(MultiMatch::must(0, 17, 21)), it.next());
339	/// assert_eq!(None, it.next());
340	/// # Ok::<(), Box<dyn std::error::Error>>(())
341	/// ```
342	pub fn new_many_sparse<P: AsRef<str>>(
343	patterns: &[P],
344	) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
345	Builder::new().build_many_sparse(patterns)
346	}
347	}
348
349	/// Convenience routines for regex construction.
350	#[cfg(feature = "alloc")]
351	impl Regex {
352	/// Return a default configuration for a `Regex`.
353	///
354	/// This is a convenience routine to avoid needing to import the `Config`
355	/// type when customizing the construction of a regex.
356	///
357	/// # Example
358	///
359	/// This example shows how to disable UTF-8 mode for `Regex` iteration.
360	/// When UTF-8 mode is disabled, the position immediately following an
361	/// empty match is where the next search begins, instead of the next
362	/// position of a UTF-8 encoded codepoint.
363	///
364	/// ```
365	/// use regex_automata::{dfa::regex::Regex, MultiMatch};
366	///
367	/// let re = Regex::builder()
368	/// .configure(Regex::config().utf8(false))
369	/// .build(r"")?;
370	/// let haystack = "a☃z".as_bytes();
371	/// let mut it = re.find_leftmost_iter(haystack);
372	/// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
373	/// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
374	/// assert_eq!(Some(MultiMatch::must(0, 2, 2)), it.next());
375	/// assert_eq!(Some(MultiMatch::must(0, 3, 3)), it.next());
376	/// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
377	/// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
378	/// assert_eq!(None, it.next());
379	///
380	/// # Ok::<(), Box<dyn std::error::Error>>(())
381	/// ```
382	pub fn config() -> Config {
383	Config::new()
384	}
385
386	/// Return a builder for configuring the construction of a `Regex`.
387	///
388	/// This is a convenience routine to avoid needing to import the
389	/// [`Builder`] type in common cases.
390	///
391	/// # Example
392	///
393	/// This example shows how to use the builder to disable UTF-8 mode
394	/// everywhere.
395	///
396	/// ```
397	/// use regex_automata::{
398	/// dfa::regex::Regex,
399	/// nfa::thompson,
400	/// MultiMatch, SyntaxConfig,
401	/// };
402	///
403	/// let re = Regex::builder()
404	/// .configure(Regex::config().utf8(false))
405	/// .syntax(SyntaxConfig::new().utf8(false))
406	/// .thompson(thompson::Config::new().utf8(false))
407	/// .build(r"foo(?-u:[^b])ar.")?;*
408	/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
409	/// let expected = Some(MultiMatch::must(0, 1, 9));
410	/// let got = re.find_leftmost(haystack);
411	/// assert_eq!(expected, got);
412	///
413	/// # Ok::<(), Box<dyn std::error::Error>>(())
414	/// ```
415	pub fn builder() -> Builder {
416	Builder::new()
417	}
418	}
419
420	/// Standard search routines for finding and iterating over matches.
421	impl<A: Automaton, P: Prefilter> Regex<A, P> {
422	/// Returns true if and only if this regex matches the given haystack.
423	///
424	/// This routine may short circuit if it knows that scanning future input
425	/// will never lead to a different result. In particular, if the underlying
426	/// DFA enters a match state or a dead state, then this routine will return
427	/// `true` or `false`, respectively, without inspecting any future input.
428	///
429	/// # Panics
430	///
431	/// If the underlying DFAs return an error, then this routine panics. This
432	/// only occurs in non-default configurations where quit bytes are used or
433	/// Unicode word boundaries are heuristically enabled.
434	///
435	/// The fallible version of this routine is
436	/// [`try_is_match`](Regex::try_is_match).
437	///
438	/// # Example
439	///
440	/// ```
441	/// use regex_automata::dfa::regex::Regex;
442	///
443	/// let re = Regex::new("foo[0-9]+bar")?;
444	/// assert_eq!(`true`, re.is_match(b"foo12345bar"));
445	/// assert_eq!(`false`, re.is_match(b"foobar"));
446	/// # Ok::<(), Box<dyn std::error::Error>>(())
447	/// ```
448	pub fn is_match(&self, haystack: &[u8]) -> bool {
449	self.is_match_at(haystack, `0`, haystack.len())
450	}
451
452	/// Returns the first position at which a match is found.
453	///
454	/// This routine stops scanning input in precisely the same circumstances
455	/// as `is_match`. The key difference is that this routine returns the
456	/// position at which it stopped scanning input if and only if a match
457	/// was found. If no match is found, then `None` is returned.
458	///
459	/// # Panics
460	///
461	/// If the underlying DFAs return an error, then this routine panics. This
462	/// only occurs in non-default configurations where quit bytes are used or
463	/// Unicode word boundaries are heuristically enabled.
464	///
465	/// The fallible version of this routine is
466	/// [`try_find_earliest`](Regex::try_find_earliest).
467	///
468	/// # Example
469	///
470	/// ```
471	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
472	///
473	/// // Normally, the leftmost first match would greedily consume as many
474	/// // decimal digits as it could. But a match is detected as soon as one
475	/// // digit is seen.
476	/// let re = Regex::new("foo[0-9]+")?;
477	/// assert_eq!(
478	/// Some(MultiMatch::must(`0`, `0`, `4`)),
479	/// re.find_earliest(b"foo12345"),
480	/// );
481	///
482	/// // Normally, the end of the leftmost first match here would be 3,
483	/// // but the "earliest" match semantics detect a match earlier.
484	/// let re = Regex::new("abc\|a")?;
485	/// assert_eq!(Some(MultiMatch::must(`0`, `0`, `1`)), re.find_earliest(b"abc"));
486	/// # Ok::<(), Box<dyn std::error::Error>>(())
487	/// ```
488	pub fn find_earliest(&self, haystack: &[u8]) -> Option<MultiMatch> {
489	self.find_earliest_at(haystack, `0`, haystack.len())
490	}
491
492	/// Returns the start and end offset of the leftmost match. If no match
493	/// exists, then `None` is returned.
494	///
495	/// # Panics
496	///
497	/// If the underlying DFAs return an error, then this routine panics. This
498	/// only occurs in non-default configurations where quit bytes are used or
499	/// Unicode word boundaries are heuristically enabled.
500	///
501	/// The fallible version of this routine is
502	/// [`try_find_leftmost`](Regex::try_find_leftmost).
503	///
504	/// # Example
505	///
506	/// ```
507	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
508	///
509	/// // Greediness is applied appropriately when compared to find_earliest.
510	/// let re = Regex::new("foo[0-9]+")?;
511	/// assert_eq!(
512	/// Some(MultiMatch::must(`0`, `3`, `11`)),
513	/// re.find_leftmost(b"zzzfoo12345zzz"),
514	/// );
515	///
516	/// // Even though a match is found after reading the first byte (`a`),
517	/// // the default leftmost-first match semantics demand that we find the
518	/// // earliest match that prefers earlier parts of the pattern over latter
519	/// // parts.
520	/// let re = Regex::new("abc\|a")?;
521	/// assert_eq!(Some(MultiMatch::must(`0`, `0`, `3`)), re.find_leftmost(b"abc"));
522	/// # Ok::<(), Box<dyn std::error::Error>>(())
523	/// ```
524	pub fn find_leftmost(&self, haystack: &[u8]) -> Option<MultiMatch> {
525	self.find_leftmost_at(haystack, `0`, haystack.len())
526	}
527
528	/// Search for the first overlapping match in `haystack`.
529	///
530	/// This routine is principally useful when searching for multiple patterns
531	/// on inputs where multiple patterns may match the same regions of text.
532	/// In particular, callers must preserve the automaton's search state from
533	/// prior calls so that the implementation knows where the last match
534	/// occurred and which pattern was reported.
535	///
536	/// # Panics
537	///
538	/// If the underlying DFAs return an error, then this routine panics. This
539	/// only occurs in non-default configurations where quit bytes are used or
540	/// Unicode word boundaries are heuristically enabled.
541	///
542	/// The fallible version of this routine is
543	/// [`try_find_overlapping`](Regex::try_find_overlapping).
544	///
545	/// # Example
546	///
547	/// This example shows how to run an overlapping search with multiple
548	/// regexes.
549	///
550	/// ```
551	/// use regex_automata::{dfa::{self, regex::Regex}, MatchKind, MultiMatch};
552	///
553	/// let re = Regex::builder()
554	/// .dense(dfa::dense::Config::new().match_kind(MatchKind::All))
555	/// .build_many(&[r"\w+$", r"\S+$"])?;
556	/// let haystack = "@foo".as_bytes();
557	/// let mut state = dfa::OverlappingState::start();
558	///
559	/// let expected = Some(MultiMatch::must(`1`, `0`, `4`));
560	/// let got = re.find_overlapping(haystack, &mut state);
561	/// assert_eq!(expected, got);
562	///
563	/// // The first pattern also matches at the same position, so re-running
564	/// // the search will yield another match. Notice also that the first
565	/// // pattern is returned after the second. This is because the second
566	/// // pattern begins its match before the first, is therefore an earlier
567	/// // match and is thus reported first.
568	/// let expected = Some(MultiMatch::must(`0`, `1`, `4`));
569	/// let got = re.find_overlapping(haystack, &mut state);
570	/// assert_eq!(expected, got);
571	///
572	/// # Ok::<(), Box<dyn std::error::Error>>(())
573	/// ```
574	pub fn find_overlapping(
575	&self,
576	haystack: &[u8],
577	state: &mut OverlappingState,
578	) -> Option<MultiMatch> {
579	self.find_overlapping_at(haystack, `0`, haystack.len(), state)
580	}
581
582	/// Returns an iterator over all non-overlapping "earliest" matches.
583	///
584	/// Match positions are reported as soon as a match is known to occur, even
585	/// if the standard leftmost match would be longer.
586	///
587	/// # Panics
588	///
589	/// If the underlying DFAs return an error during iteration, then iteration
590	/// panics. This only occurs in non-default configurations where quit bytes
591	/// are used or Unicode word boundaries are heuristically enabled.
592	///
593	/// The fallible version of this routine is
594	/// [`try_find_earliest_iter`](Regex::try_find_earliest_iter).
595	///
596	/// # Example
597	///
598	/// This example shows how to run an "earliest" iterator.
599	///
600	/// ```
601	/// use regex_automata::{dfa::regex::Regex, MultiMatch};
602	///
603	/// let re = Regex::new("[0-9]+")?;
604	/// let haystack = "123".as_bytes();
605	///
606	/// // Normally, a standard leftmost iterator would return a single
607	/// // match, but since "earliest" detects matches earlier, we get
608	/// // three matches.
609	/// let mut it = re.find_earliest_iter(haystack);
610	/// assert_eq!(Some(MultiMatch::must(`0`, `0`, `1`)), it.next());
611	/// assert_eq!(Some(MultiMatch::must(`0`, `1`, `2`)), it.next());
612	/// assert_eq!(Some(MultiMatch::must(`0`, `2`, `3`)), it.next());
613	/// assert_eq!(None, it.next());
614	///
615	/// # Ok::<(), Box<dyn std::error::Error>>(())
616	/// ```
617	pub fn find_earliest_iter<'r, 't>(
618	&'r self,
619	haystack: &'t [u8],
620	) -> FindEarliestMatches<'r, 't, A, P> {
621	FindEarliestMatches::new(self, haystack)
622	}
623
624	/// Returns an iterator over all non-overlapping leftmost matches in the
625	/// given bytes. If no match exists, then the iterator yields no elements.
626	///
627	/// This corresponds to the "standard" regex search iterator.
628	///
629	/// # Panics
630	///
631	/// If the underlying DFAs return an error during iteration, then iteration
632	/// panics. This only occurs in non-default configurations where quit bytes
633	/// are used or Unicode word boundaries are heuristically enabled.
634	///
635	/// The fallible version of this routine is
636	/// [`try_find_leftmost_iter`](Regex::try_find_leftmost_iter).
637	///
638	/// # Example
639	///
640	/// ```
641	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
642	///
643	/// let re = Regex::new("foo[0-9]+")?;
644	/// let text = b"foo1 foo12 foo123";
645	/// let matches: Vec<MultiMatch> = re.find_leftmost_iter(text).collect();
646	/// assert_eq!(matches, vec![
647	/// MultiMatch::must(`0`, `0`, `4`),
648	/// MultiMatch::must(`0`, `5`, `10`),
649	/// MultiMatch::must(`0`, `11`, `17`),
650	/// ]);
651	/// # Ok::<(), Box<dyn std::error::Error>>(())
652	/// ```
653	pub fn find_leftmost_iter<'r, 't>(
654	&'r self,
655	haystack: &'t [u8],
656	) -> FindLeftmostMatches<'r, 't, A, P> {
657	FindLeftmostMatches::new(self, haystack)
658	}
659
660	/// Returns an iterator over all overlapping matches in the given haystack.
661	///
662	/// This routine is principally useful when searching for multiple patterns
663	/// on inputs where multiple patterns may match the same regions of text.
664	/// The iterator takes care of handling the overlapping state that must be
665	/// threaded through every search.
666	///
667	/// # Panics
668	///
669	/// If the underlying DFAs return an error during iteration, then iteration
670	/// panics. This only occurs in non-default configurations where quit bytes
671	/// are used or Unicode word boundaries are heuristically enabled.
672	///
673	/// The fallible version of this routine is
674	/// [`try_find_overlapping_iter`](Regex::try_find_overlapping_iter).
675	///
676	/// # Example
677	///
678	/// This example shows how to run an overlapping search with multiple
679	/// regexes.
680	///
681	/// ```
682	/// use regex_automata::{dfa::{self, regex::Regex}, MatchKind, MultiMatch};
683	///
684	/// let re = Regex::builder()
685	/// .dense(dfa::dense::Config::new().match_kind(MatchKind::All))
686	/// .build_many(&[r"\w+$", r"\S+$"])?;
687	/// let haystack = "@foo".as_bytes();
688	///
689	/// let mut it = re.find_overlapping_iter(haystack);
690	/// assert_eq!(Some(MultiMatch::must(`1`, `0`, `4`)), it.next());
691	/// assert_eq!(Some(MultiMatch::must(`0`, `1`, `4`)), it.next());
692	/// assert_eq!(None, it.next());
693	///
694	/// # Ok::<(), Box<dyn std::error::Error>>(())
695	/// ```
696	pub fn find_overlapping_iter<'r, 't>(
697	&'r self,
698	haystack: &'t [u8],
699	) -> FindOverlappingMatches<'r, 't, A, P> {
700	FindOverlappingMatches::new(self, haystack)
701	}
702	}
703
704	/// Lower level infallible search routines that permit controlling where
705	/// the search starts and ends in a particular sequence. This is useful for
706	/// executing searches that need to take surrounding context into account. This
707	/// is required for correctly implementing iteration because of look-around
708	/// operators (`^`, `$`, `\b`).
709	impl<A: Automaton, P: Prefilter> Regex<A, P> {
710	/// Returns true if and only if this regex matches the given haystack.
711	///
712	/// This routine may short circuit if it knows that scanning future input
713	/// will never lead to a different result. In particular, if the underlying
714	/// DFA enters a match state or a dead state, then this routine will return
715	/// `true` or `false`, respectively, without inspecting any future input.
716	///
717	/// # Searching a substring of the haystack
718	///
719	/// Being an "at" search routine, this permits callers to search a
720	/// substring of `haystack` by specifying a range in `haystack`.
721	/// Why expose this as an API instead of just asking callers to use
722	/// `&input[start..end]`? The reason is that regex matching often wants
723	/// to take the surrounding context into account in order to handle
724	/// look-around (`^`, `$` and `\b`).
725	///
726	/// # Panics
727	///
728	/// If the underlying DFAs return an error, then this routine panics. This
729	/// only occurs in non-default configurations where quit bytes are used or
730	/// Unicode word boundaries are heuristically enabled.
731	///
732	/// The fallible version of this routine is
733	/// [`try_is_match_at`](Regex::try_is_match_at).
734	pub fn is_match_at(
735	&self,
736	haystack: &[u8],
737	start: usize,
738	end: usize,
739	) -> bool {
740	self.try_is_match_at(haystack, start, end).unwrap()
741	}
742
743	/// Returns the first position at which a match is found.
744	///
745	/// This routine stops scanning input in precisely the same circumstances
746	/// as `is_match`. The key difference is that this routine returns the
747	/// position at which it stopped scanning input if and only if a match
748	/// was found. If no match is found, then `None` is returned.
749	///
750	/// # Searching a substring of the haystack
751	///
752	/// Being an "at" search routine, this permits callers to search a
753	/// substring of `haystack` by specifying a range in `haystack`.
754	/// Why expose this as an API instead of just asking callers to use
755	/// `&input[start..end]`? The reason is that regex matching often wants
756	/// to take the surrounding context into account in order to handle
757	/// look-around (`^`, `$` and `\b`).
758	///
759	/// This is useful when implementing an iterator over matches
760	/// within the same haystack, which cannot be done correctly by simply
761	/// providing a subslice of `haystack`.
762	///
763	/// # Panics
764	///
765	/// If the underlying DFAs return an error, then this routine panics. This
766	/// only occurs in non-default configurations where quit bytes are used or
767	/// Unicode word boundaries are heuristically enabled.
768	///
769	/// The fallible version of this routine is
770	/// [`try_find_earliest_at`](Regex::try_find_earliest_at).
771	pub fn find_earliest_at(
772	&self,
773	haystack: &[u8],
774	start: usize,
775	end: usize,
776	) -> Option<MultiMatch> {
777	self.try_find_earliest_at(haystack, start, end).unwrap()
778	}
779
780	/// Returns the same as `find_leftmost`, but starts the search at the given
781	/// offset.
782	///
783	/// The significance of the starting point is that it takes the surrounding
784	/// context into consideration. For example, if the DFA is anchored, then
785	/// a match can only occur when `start == 0`.
786	///
787	/// # Searching a substring of the haystack
788	///
789	/// Being an "at" search routine, this permits callers to search a
790	/// substring of `haystack` by specifying a range in `haystack`.
791	/// Why expose this as an API instead of just asking callers to use
792	/// `&input[start..end]`? The reason is that regex matching often wants
793	/// to take the surrounding context into account in order to handle
794	/// look-around (`^`, `$` and `\b`).
795	///
796	/// This is useful when implementing an iterator over matches within the
797	/// same haystack, which cannot be done correctly by simply providing a
798	/// subslice of `haystack`.
799	///
800	/// # Panics
801	///
802	/// If the underlying DFAs return an error, then this routine panics. This
803	/// only occurs in non-default configurations where quit bytes are used or
804	/// Unicode word boundaries are heuristically enabled.
805	///
806	/// The fallible version of this routine is
807	/// [`try_find_leftmost_at`](Regex::try_find_leftmost_at).
808	pub fn find_leftmost_at(
809	&self,
810	haystack: &[u8],
811	start: usize,
812	end: usize,
813	) -> Option<MultiMatch> {
814	self.try_find_leftmost_at(haystack, start, end).unwrap()
815	}
816
817	/// Search for the first overlapping match within a given range of
818	/// `haystack`.
819	///
820	/// This routine is principally useful when searching for multiple patterns
821	/// on inputs where multiple patterns may match the same regions of text.
822	/// In particular, callers must preserve the automaton's search state from
823	/// prior calls so that the implementation knows where the last match
824	/// occurred and which pattern was reported.
825	///
826	/// # Searching a substring of the haystack
827	///
828	/// Being an "at" search routine, this permits callers to search a
829	/// substring of `haystack` by specifying a range in `haystack`.
830	/// Why expose this as an API instead of just asking callers to use
831	/// `&input[start..end]`? The reason is that regex matching often wants
832	/// to take the surrounding context into account in order to handle
833	/// look-around (`^`, `$` and `\b`).
834	///
835	/// This is useful when implementing an iterator over matches
836	/// within the same haystack, which cannot be done correctly by simply
837	/// providing a subslice of `haystack`.
838	///
839	/// # Panics
840	///
841	/// If the underlying DFAs return an error, then this routine panics. This
842	/// only occurs in non-default configurations where quit bytes are used or
843	/// Unicode word boundaries are heuristically enabled.
844	///
845	/// The fallible version of this routine is
846	/// [`try_find_overlapping_at`](Regex::try_find_overlapping_at).
847	pub fn find_overlapping_at(
848	&self,
849	haystack: &[u8],
850	start: usize,
851	end: usize,
852	state: &mut OverlappingState,
853	) -> Option<MultiMatch> {
854	self.try_find_overlapping_at(haystack, start, end, state).unwrap()
855	}
856	}
857
858	/// Fallible search routines. These may return an error when the underlying
859	/// DFAs have been configured in a way that permits them to fail during a
860	/// search.
861	///
862	/// Errors during search only occur when the DFA has been explicitly
863	/// configured to do so, usually by specifying one or more "quit" bytes or by
864	/// heuristically enabling Unicode word boundaries.
865	///
866	/// Errors will never be returned using the default configuration. So these
867	/// fallible routines are only needed for particular configurations.
868	impl<A: Automaton, P: Prefilter> Regex<A, P> {
869	/// Returns true if and only if this regex matches the given haystack.
870	///
871	/// This routine may short circuit if it knows that scanning future input
872	/// will never lead to a different result. In particular, if the underlying
873	/// DFA enters a match state or a dead state, then this routine will return
874	/// `true` or `false`, respectively, without inspecting any future input.
875	///
876	/// # Errors
877	///
878	/// This routine only errors if the search could not complete. For
879	/// DFA-based regexes, this only occurs in a non-default configuration
880	/// where quit bytes are used or Unicode word boundaries are heuristically
881	/// enabled.
882	///
883	/// When a search cannot complete, callers cannot know whether a match
884	/// exists or not.
885	///
886	/// The infallible (panics on error) version of this routine is
887	/// [`is_match`](Regex::is_match).
888	pub fn try_is_match(&self, haystack: &[u8]) -> Result<bool, MatchError> {
889	self.try_is_match_at(haystack, `0`, haystack.len())
890	}
891
892	/// Returns the first position at which a match is found.
893	///
894	/// This routine stops scanning input in precisely the same circumstances
895	/// as `is_match`. The key difference is that this routine returns the
896	/// position at which it stopped scanning input if and only if a match
897	/// was found. If no match is found, then `None` is returned.
898	///
899	/// # Errors
900	///
901	/// This routine only errors if the search could not complete. For
902	/// DFA-based regexes, this only occurs in a non-default configuration
903	/// where quit bytes are used or Unicode word boundaries are heuristically
904	/// enabled.
905	///
906	/// When a search cannot complete, callers cannot know whether a match
907	/// exists or not.
908	///
909	/// The infallible (panics on error) version of this routine is
910	/// [`find_earliest`](Regex::find_earliest).
911	pub fn try_find_earliest(
912	&self,
913	haystack: &[u8],
914	) -> Result<Option<MultiMatch>, MatchError> {
915	self.try_find_earliest_at(haystack, `0`, haystack.len())
916	}
917
918	/// Returns the start and end offset of the leftmost match. If no match
919	/// exists, then `None` is returned.
920	///
921	/// # Errors
922	///
923	/// This routine only errors if the search could not complete. For
924	/// DFA-based regexes, this only occurs in a non-default configuration
925	/// where quit bytes are used or Unicode word boundaries are heuristically
926	/// enabled.
927	///
928	/// When a search cannot complete, callers cannot know whether a match
929	/// exists or not.
930	///
931	/// The infallible (panics on error) version of this routine is
932	/// [`find_leftmost`](Regex::find_leftmost).
933	pub fn try_find_leftmost(
934	&self,
935	haystack: &[u8],
936	) -> Result<Option<MultiMatch>, MatchError> {
937	self.try_find_leftmost_at(haystack, `0`, haystack.len())
938	}
939
940	/// Search for the first overlapping match in `haystack`.
941	///
942	/// This routine is principally useful when searching for multiple patterns
943	/// on inputs where multiple patterns may match the same regions of text.
944	/// In particular, callers must preserve the automaton's search state from
945	/// prior calls so that the implementation knows where the last match
946	/// occurred and which pattern was reported.
947	///
948	/// # Errors
949	///
950	/// This routine only errors if the search could not complete. For
951	/// DFA-based regexes, this only occurs in a non-default configuration
952	/// where quit bytes are used or Unicode word boundaries are heuristically
953	/// enabled.
954	///
955	/// When a search cannot complete, callers cannot know whether a match
956	/// exists or not.
957	///
958	/// The infallible (panics on error) version of this routine is
959	/// [`find_overlapping`](Regex::find_overlapping).
960	pub fn try_find_overlapping(
961	&self,
962	haystack: &[u8],
963	state: &mut OverlappingState,
964	) -> Result<Option<MultiMatch>, MatchError> {
965	self.try_find_overlapping_at(haystack, `0`, haystack.len(), state)
966	}
967
968	/// Returns an iterator over all non-overlapping "earliest" matches.
969	///
970	/// Match positions are reported as soon as a match is known to occur, even
971	/// if the standard leftmost match would be longer.
972	///
973	/// # Errors
974	///
975	/// This iterator only yields errors if the search could not complete. For
976	/// DFA-based regexes, this only occurs in a non-default configuration
977	/// where quit bytes are used or Unicode word boundaries are heuristically
978	/// enabled.
979	///
980	/// When a search cannot complete, callers cannot know whether a match
981	/// exists or not.
982	///
983	/// The infallible (panics on error) version of this routine is
984	/// [`find_earliest_iter`](Regex::find_earliest_iter).
985	pub fn try_find_earliest_iter<'r, 't>(
986	&'r self,
987	haystack: &'t [u8],
988	) -> TryFindEarliestMatches<'r, 't, A, P> {
989	TryFindEarliestMatches::new(self, haystack)
990	}
991
992	/// Returns an iterator over all non-overlapping leftmost matches in the
993	/// given bytes. If no match exists, then the iterator yields no elements.
994	///
995	/// This corresponds to the "standard" regex search iterator.
996	///
997	/// # Errors
998	///
999	/// This iterator only yields errors if the search could not complete. For
1000	/// DFA-based regexes, this only occurs in a non-default configuration
1001	/// where quit bytes are used or Unicode word boundaries are heuristically
1002	/// enabled.
1003	///
1004	/// When a search cannot complete, callers cannot know whether a match
1005	/// exists or not.
1006	///
1007	/// The infallible (panics on error) version of this routine is
1008	/// [`find_leftmost_iter`](Regex::find_leftmost_iter).
1009	pub fn try_find_leftmost_iter<'r, 't>(
1010	&'r self,
1011	haystack: &'t [u8],
1012	) -> TryFindLeftmostMatches<'r, 't, A, P> {
1013	TryFindLeftmostMatches::new(self, haystack)
1014	}
1015
1016	/// Returns an iterator over all overlapping matches in the given haystack.
1017	///
1018	/// This routine is principally useful when searching for multiple patterns
1019	/// on inputs where multiple patterns may match the same regions of text.
1020	/// The iterator takes care of handling the overlapping state that must be
1021	/// threaded through every search.
1022	///
1023	/// # Errors
1024	///
1025	/// This iterator only yields errors if the search could not complete. For
1026	/// DFA-based regexes, this only occurs in a non-default configuration
1027	/// where quit bytes are used or Unicode word boundaries are heuristically
1028	/// enabled.
1029	///
1030	/// When a search cannot complete, callers cannot know whether a match
1031	/// exists or not.
1032	///
1033	/// The infallible (panics on error) version of this routine is
1034	/// [`find_overlapping_iter`](Regex::find_overlapping_iter).
1035	pub fn try_find_overlapping_iter<'r, 't>(
1036	&'r self,
1037	haystack: &'t [u8],
1038	) -> TryFindOverlappingMatches<'r, 't, A, P> {
1039	TryFindOverlappingMatches::new(self, haystack)
1040	}
1041	}
1042
1043	/// Lower level fallible search routines that permit controlling where the
1044	/// search starts and ends in a particular sequence.
1045	impl<A: Automaton, P: Prefilter> Regex<A, P> {
1046	/// Returns true if and only if this regex matches the given haystack.
1047	///
1048	/// This routine may short circuit if it knows that scanning future input
1049	/// will never lead to a different result. In particular, if the underlying
1050	/// DFA enters a match state or a dead state, then this routine will return
1051	/// `true` or `false`, respectively, without inspecting any future input.
1052	///
1053	/// # Searching a substring of the haystack
1054	///
1055	/// Being an "at" search routine, this permits callers to search a
1056	/// substring of `haystack` by specifying a range in `haystack`.
1057	/// Why expose this as an API instead of just asking callers to use
1058	/// `&input[start..end]`? The reason is that regex matching often wants
1059	/// to take the surrounding context into account in order to handle
1060	/// look-around (`^`, `$` and `\b`).
1061	///
1062	/// # Errors
1063	///
1064	/// This routine only errors if the search could not complete. For
1065	/// DFA-based regexes, this only occurs in a non-default configuration
1066	/// where quit bytes are used, Unicode word boundaries are heuristically
1067	/// enabled or limits are set on the number of times the lazy DFA's cache
1068	/// may be cleared.
1069	///
1070	/// When a search cannot complete, callers cannot know whether a match
1071	/// exists or not.
1072	///
1073	/// The infallible (panics on error) version of this routine is
1074	/// [`is_match_at`](Regex::is_match_at).
1075	pub fn try_is_match_at(
1076	&self,
1077	haystack: &[u8],
1078	start: usize,
1079	end: usize,
1080	) -> Result<bool, MatchError> {
1081	self.forward()
1082	.find_earliest_fwd_at(
1083	self.scanner().as_mut(),
1084	None,
1085	haystack,
1086	start,
1087	end,
1088	)
1089	.map(\|x\| x.is_some())
1090	}
1091
1092	/// Returns the first position at which a match is found.
1093	///
1094	/// This routine stops scanning input in precisely the same circumstances
1095	/// as `is_match`. The key difference is that this routine returns the
1096	/// position at which it stopped scanning input if and only if a match
1097	/// was found. If no match is found, then `None` is returned.
1098	///
1099	/// # Searching a substring of the haystack
1100	///
1101	/// Being an "at" search routine, this permits callers to search a
1102	/// substring of `haystack` by specifying a range in `haystack`.
1103	/// Why expose this as an API instead of just asking callers to use
1104	/// `&input[start..end]`? The reason is that regex matching often wants
1105	/// to take the surrounding context into account in order to handle
1106	/// look-around (`^`, `$` and `\b`).
1107	///
1108	/// This is useful when implementing an iterator over matches
1109	/// within the same haystack, which cannot be done correctly by simply
1110	/// providing a subslice of `haystack`.
1111	///
1112	/// # Errors
1113	///
1114	/// This routine only errors if the search could not complete. For
1115	/// DFA-based regexes, this only occurs in a non-default configuration
1116	/// where quit bytes are used or Unicode word boundaries are heuristically
1117	/// enabled.
1118	///
1119	/// When a search cannot complete, callers cannot know whether a match
1120	/// exists or not.
1121	///
1122	/// The infallible (panics on error) version of this routine is
1123	/// [`find_earliest_at`](Regex::find_earliest_at).
1124	pub fn try_find_earliest_at(
1125	&self,
1126	haystack: &[u8],
1127	start: usize,
1128	end: usize,
1129	) -> Result<Option<MultiMatch>, MatchError> {
1130	self.try_find_earliest_at_imp(
1131	self.scanner().as_mut(),
1132	haystack,
1133	start,
1134	end,
1135	)
1136	}
1137
1138	/// The implementation of "earliest" searching, where a prefilter scanner
1139	/// may be given.
1140	fn try_find_earliest_at_imp(
1141	&self,
1142	pre: Option<&mut prefilter::Scanner>,
1143	haystack: &[u8],
1144	start: usize,
1145	end: usize,
1146	) -> Result<Option<MultiMatch>, MatchError> {
1147	// N.B. We use `&&A` here to call `Automaton` methods, which ensures
1148	// that we always use the `impl Automaton for &A` for calling methods.
1149	// Since this is the usual way that automata are used, this helps
1150	// reduce the number of monomorphized copies of the search code.
1151	let (fwd, rev) = (self.forward(), self.reverse());
1152	let end = match (&fwd)
1153	.find_earliest_fwd_at(pre, None, haystack, start, end)?
1154	{
1155	None => return Ok(None),
1156	Some(end) => end,
1157	};
1158	// N.B. The only time we need to tell the reverse searcher the pattern
1159	// to match is in the overlapping case, since it's ambiguous. In the
1160	// leftmost case, I have tentatively convinced myself that it isn't
1161	// necessary and the reverse search will always find the same pattern
1162	// to match as the forward search. But I lack a rigorous proof.
1163	let start = (&rev)
1164	.find_earliest_rev_at(None, haystack, start, end.offset())?
1165	.expect("reverse search must match if forward search does");
1166	assert_eq!(
1167	start.pattern(),
1168	end.pattern(),
1169	"forward and reverse search must match same pattern"
1170	);
1171	assert!(start.offset() <= end.offset());
1172	Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
1173	}
1174
1175	/// Returns the start and end offset of the leftmost match. If no match
1176	/// exists, then `None` is returned.
1177	///
1178	/// # Searching a substring of the haystack
1179	///
1180	/// Being an "at" search routine, this permits callers to search a
1181	/// substring of `haystack` by specifying a range in `haystack`.
1182	/// Why expose this as an API instead of just asking callers to use
1183	/// `&input[start..end]`? The reason is that regex matching often wants
1184	/// to take the surrounding context into account in order to handle
1185	/// look-around (`^`, `$` and `\b`).
1186	///
1187	/// This is useful when implementing an iterator over matches
1188	/// within the same haystack, which cannot be done correctly by simply
1189	/// providing a subslice of `haystack`.
1190	///
1191	/// # Errors
1192	///
1193	/// This routine only errors if the search could not complete. For
1194	/// DFA-based regexes, this only occurs in a non-default configuration
1195	/// where quit bytes are used or Unicode word boundaries are heuristically
1196	/// enabled.
1197	///
1198	/// When a search cannot complete, callers cannot know whether a match
1199	/// exists or not.
1200	///
1201	/// The infallible (panics on error) version of this routine is
1202	/// [`find_leftmost_at`](Regex::find_leftmost_at).
1203	pub fn try_find_leftmost_at(
1204	&self,
1205	haystack: &[u8],
1206	start: usize,
1207	end: usize,
1208	) -> Result<Option<MultiMatch>, MatchError> {
1209	self.try_find_leftmost_at_imp(
1210	self.scanner().as_mut(),
1211	haystack,
1212	start,
1213	end,
1214	)
1215	}
1216
1217	/// The implementation of leftmost searching, where a prefilter scanner
1218	/// may be given.
1219	fn try_find_leftmost_at_imp(
1220	&self,
1221	scanner: Option<&mut prefilter::Scanner>,
1222	haystack: &[u8],
1223	start: usize,
1224	end: usize,
1225	) -> Result<Option<MultiMatch>, MatchError> {
1226	// N.B. We use `&&A` here to call `Automaton` methods, which ensures
1227	// that we always use the `impl Automaton for &A` for calling methods.
1228	// Since this is the usual way that automata are used, this helps
1229	// reduce the number of monomorphized copies of the search code.
1230	let (fwd, rev) = (self.forward(), self.reverse());
1231	let end = match (&fwd)
1232	.find_leftmost_fwd_at(scanner, None, haystack, start, end)?
1233	{
1234	None => return Ok(None),
1235	Some(end) => end,
1236	};
1237	// N.B. The only time we need to tell the reverse searcher the pattern
1238	// to match is in the overlapping case, since it's ambiguous. In the
1239	// leftmost case, I have tentatively convinced myself that it isn't
1240	// necessary and the reverse search will always find the same pattern
1241	// to match as the forward search. But I lack a rigorous proof. Why not
1242	// just provide the pattern anyway? Well, if it is needed, then leaving
1243	// it out gives us a chance to find a witness.
1244	let start = (&rev)
1245	.find_leftmost_rev_at(None, haystack, start, end.offset())?
1246	.expect("reverse search must match if forward search does");
1247	assert_eq!(
1248	start.pattern(),
1249	end.pattern(),
1250	"forward and reverse search must match same pattern",
1251	);
1252	assert!(start.offset() <= end.offset());
1253	Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
1254	}
1255
1256	/// Search for the first overlapping match within a given range of
1257	/// `haystack`.
1258	///
1259	/// This routine is principally useful when searching for multiple patterns
1260	/// on inputs where multiple patterns may match the same regions of text.
1261	/// In particular, callers must preserve the automaton's search state from
1262	/// prior calls so that the implementation knows where the last match
1263	/// occurred and which pattern was reported.
1264	///
1265	/// # Searching a substring of the haystack
1266	///
1267	/// Being an "at" search routine, this permits callers to search a
1268	/// substring of `haystack` by specifying a range in `haystack`.
1269	/// Why expose this as an API instead of just asking callers to use
1270	/// `&input[start..end]`? The reason is that regex matching often wants
1271	/// to take the surrounding context into account in order to handle
1272	/// look-around (`^`, `$` and `\b`).
1273	///
1274	/// This is useful when implementing an iterator over matches
1275	/// within the same haystack, which cannot be done correctly by simply
1276	/// providing a subslice of `haystack`.
1277	///
1278	/// # Errors
1279	///
1280	/// This routine only errors if the search could not complete. For
1281	/// DFA-based regexes, this only occurs in a non-default configuration
1282	/// where quit bytes are used or Unicode word boundaries are heuristically
1283	/// enabled.
1284	///
1285	/// When a search cannot complete, callers cannot know whether a match
1286	/// exists or not.
1287	///
1288	/// The infallible (panics on error) version of this routine is
1289	/// [`find_overlapping_at`](Regex::find_overlapping_at).
1290	pub fn try_find_overlapping_at(
1291	&self,
1292	haystack: &[u8],
1293	start: usize,
1294	end: usize,
1295	state: &mut OverlappingState,
1296	) -> Result<Option<MultiMatch>, MatchError> {
1297	self.try_find_overlapping_at_imp(
1298	self.scanner().as_mut(),
1299	haystack,
1300	start,
1301	end,
1302	state,
1303	)
1304	}
1305
1306	/// The implementation of overlapping search at a given range in
1307	/// `haystack`, where `scanner` is a prefilter (if active) and `state` is
1308	/// the current state of the search.
1309	fn try_find_overlapping_at_imp(
1310	&self,
1311	scanner: Option<&mut prefilter::Scanner>,
1312	haystack: &[u8],
1313	start: usize,
1314	end: usize,
1315	state: &mut OverlappingState,
1316	) -> Result<Option<MultiMatch>, MatchError> {
1317	// N.B. We use `&&A` here to call `Automaton` methods, which ensures
1318	// that we always use the `impl Automaton for &A` for calling methods.
1319	// Since this is the usual way that automata are used, this helps
1320	// reduce the number of monomorphized copies of the search code.
1321	let (fwd, rev) = (self.forward(), self.reverse());
1322	// TODO: Decide whether it's worth making this assert work. It doesn't
1323	// work currently because 'has_starts_for_each_pattern' isn't on the
1324	// Automaton trait. Without this assert, we still get a panic, but it's
1325	// a bit more inscrutable.
1326	// assert!(
1327	// rev.has_starts_for_each_pattern(),
1328	// "overlapping searches require that the reverse DFA is \
1329	// compiled with the 'starts_for_each_pattern' option",
1330	// );
1331	let end = match (&fwd).find_overlapping_fwd_at(
1332	scanner, None, haystack, start, end, state,
1333	)? {
1334	None => return Ok(None),
1335	Some(end) => end,
1336	};
1337	// Unlike the leftmost cases, the reverse overlapping search may match
1338	// a different pattern than the forward search. See test failures when
1339	// using `None` instead of `Some(end.pattern())` below. Thus, we must
1340	// run our reverse search using the pattern that matched in the forward
1341	// direction.
1342	let start = (&rev)
1343	.find_leftmost_rev_at(
1344	Some(end.pattern()),
1345	haystack,
1346	`0`,
1347	end.offset(),
1348	)?
1349	.expect("reverse search must match if forward search does");
1350	assert!(start.offset() <= end.offset());
1351	assert_eq!(start.pattern(), end.pattern());
1352	Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
1353	}
1354	}
1355
1356	/// Non-search APIs for querying information about the regex and setting a
1357	/// prefilter.
1358	impl<A: Automaton, P: Prefilter> Regex<A, P> {
1359	/// Attach the given prefilter to this regex.
1360	pub fn with_prefilter<Q: Prefilter>(self, prefilter: Q) -> Regex<A, Q> {
1361	Regex {
1362	prefilter: Some(prefilter),
1363	forward: self.forward,
1364	reverse: self.reverse,
1365	utf8: self.utf8,
1366	}
1367	}
1368
1369	/// Remove any prefilter from this regex.
1370	pub fn without_prefilter(self) -> Regex<A> {
1371	Regex {
1372	prefilter: None,
1373	forward: self.forward,
1374	reverse: self.reverse,
1375	utf8: self.utf8,
1376	}
1377	}
1378
1379	/// Return the underlying DFA responsible for forward matching.
1380	///
1381	/// This is useful for accessing the underlying DFA and converting it to
1382	/// some other format or size. See the [`Builder::build_from_dfas`] docs
1383	/// for an example of where this might be useful.
1384	pub fn forward(&self) -> &A {
1385	&self.forward
1386	}
1387
1388	/// Return the underlying DFA responsible for reverse matching.
1389	///
1390	/// This is useful for accessing the underlying DFA and converting it to
1391	/// some other format or size. See the [`Builder::build_from_dfas`] docs
1392	/// for an example of where this might be useful.
1393	pub fn reverse(&self) -> &A {
1394	&self.reverse
1395	}
1396
1397	/// Returns the total number of patterns matched by this regex.
1398	///
1399	/// # Example
1400	///
1401	/// ```
1402	/// use regex_automata::{MultiMatch, dfa::regex::Regex};
1403	///
1404	/// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?;
1405	/// assert_eq!(`3`, re.pattern_count());
1406	/// # Ok::<(), Box<dyn std::error::Error>>(())
1407	/// ```
1408	pub fn pattern_count(&self) -> usize {
1409	assert_eq!(
1410	self.forward().pattern_count(),
1411	self.reverse().pattern_count()
1412	);
1413	self.forward().pattern_count()
1414	}
1415
1416	/// Convenience function for returning this regex's prefilter as a trait
1417	/// object.
1418	///
1419	/// If this regex doesn't have a prefilter, then `None` is returned.
1420	pub fn prefilter(&self) -> Option<&dyn Prefilter> {
1421	match self.prefilter {
1422	None => None,
1423	Some(ref x) => Some(&*x),
1424	}
1425	}
1426
1427	/// Convenience function for returning a prefilter scanner.
1428	fn scanner(&self) -> Option<prefilter::Scanner> {
1429	self.prefilter().map(prefilter::Scanner::new)
1430	}
1431	}
1432
1433	/// An iterator over all non-overlapping earliest matches for a particular
1434	/// infallible search.
1435	///
1436	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1437	/// found. If the underlying search returns an error, then this panics.
1438	///
1439	/// `A` is the type used to represent the underlying DFAs used by the regex,
1440	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1441	/// as follows:
1442	///
1443	/// `'r` is the lifetime of the regular expression itself.*
1444	/// `'t` is the lifetime of the text being searched.*
1445	#[derive(Clone, Debug)]
1446	pub struct FindEarliestMatches<'r, 't, A, P>(
1447	TryFindEarliestMatches<'r, 't, A, P>,
1448	);
1449
1450	impl<'r, 't, A: Automaton, P: Prefilter> FindEarliestMatches<'r, 't, A, P> {
1451	fn new(
1452	re: &'r Regex<A, P>,
1453	text: &'t [u8],
1454	) -> FindEarliestMatches<'r, 't, A, P> {
1455	FindEarliestMatches(TryFindEarliestMatches::new(re, text))
1456	}
1457	}
1458
1459	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1460	for FindEarliestMatches<'r, 't, A, P>
1461	{
1462	type Item = MultiMatch;
1463
1464	fn next(&mut self) -> Option<MultiMatch> {
1465	next_unwrap(self.0.next())
1466	}
1467	}
1468
1469	/// An iterator over all non-overlapping leftmost matches for a particular
1470	/// infallible search.
1471	///
1472	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1473	/// found. If the underlying search returns an error, then this panics.
1474	///
1475	/// `A` is the type used to represent the underlying DFAs used by the regex,
1476	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1477	/// as follows:
1478	///
1479	/// `'r` is the lifetime of the regular expression itself.*
1480	/// `'t` is the lifetime of the text being searched.*
1481	#[derive(Clone, Debug)]
1482	pub struct FindLeftmostMatches<'r, 't, A, P>(
1483	TryFindLeftmostMatches<'r, 't, A, P>,
1484	);
1485
1486	impl<'r, 't, A: Automaton, P: Prefilter> FindLeftmostMatches<'r, 't, A, P> {
1487	fn new(
1488	re: &'r Regex<A, P>,
1489	text: &'t [u8],
1490	) -> FindLeftmostMatches<'r, 't, A, P> {
1491	FindLeftmostMatches(TryFindLeftmostMatches::new(re, text))
1492	}
1493	}
1494
1495	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1496	for FindLeftmostMatches<'r, 't, A, P>
1497	{
1498	type Item = MultiMatch;
1499
1500	fn next(&mut self) -> Option<MultiMatch> {
1501	next_unwrap(self.0.next())
1502	}
1503	}
1504
1505	/// An iterator over all overlapping matches for a particular infallible
1506	/// search.
1507	///
1508	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1509	/// found. If the underlying search returns an error, then this panics.
1510	///
1511	/// `A` is the type used to represent the underlying DFAs used by the regex,
1512	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1513	/// as follows:
1514	///
1515	/// `'r` is the lifetime of the regular expression itself.*
1516	/// `'t` is the lifetime of the text being searched.*
1517	#[derive(Clone, Debug)]
1518	pub struct FindOverlappingMatches<'r, 't, A: Automaton, P>(
1519	TryFindOverlappingMatches<'r, 't, A, P>,
1520	);
1521
1522	impl<'r, 't, A: Automaton, P: Prefilter> FindOverlappingMatches<'r, 't, A, P> {
1523	fn new(
1524	re: &'r Regex<A, P>,
1525	text: &'t [u8],
1526	) -> FindOverlappingMatches<'r, 't, A, P> {
1527	FindOverlappingMatches(TryFindOverlappingMatches::new(re, text))
1528	}
1529	}
1530
1531	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1532	for FindOverlappingMatches<'r, 't, A, P>
1533	{
1534	type Item = MultiMatch;
1535
1536	fn next(&mut self) -> Option<MultiMatch> {
1537	next_unwrap(self.0.next())
1538	}
1539	}
1540
1541	/// An iterator over all non-overlapping earliest matches for a particular
1542	/// fallible search.
1543	///
1544	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1545	/// found.
1546	///
1547	/// `A` is the type used to represent the underlying DFAs used by the regex,
1548	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1549	/// as follows:
1550	///
1551	/// `'r` is the lifetime of the regular expression itself.*
1552	/// `'t` is the lifetime of the text being searched.*
1553	#[derive(Clone, Debug)]
1554	pub struct TryFindEarliestMatches<'r, 't, A, P> {
1555	re: &'r Regex<A, P>,
1556	scanner: Option<prefilter::Scanner<'r>>,
1557	text: &'t [u8],
1558	last_end: usize,
1559	last_match: Option<usize>,
1560	}
1561
1562	impl<'r, 't, A: Automaton, P: Prefilter> TryFindEarliestMatches<'r, 't, A, P> {
1563	fn new(
1564	re: &'r Regex<A, P>,
1565	text: &'t [u8],
1566	) -> TryFindEarliestMatches<'r, 't, A, P> {
1567	let scanner: Option> = re.scanner();
1568	TryFindEarliestMatches {
1569	re,
1570	scanner,
1571	text,
1572	last_end: `0`,
1573	last_match: None,
1574	}
1575	}
1576	}
1577
1578	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1579	for TryFindEarliestMatches<'r, 't, A, P>
1580	{
1581	type Item = Result<MultiMatch, MatchError>;
1582
1583	fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
1584	if self.last_end > self.text.len() {
1585	return None;
1586	}
1587	let result = self.re.try_find_earliest_at_imp(
1588	self.scanner.as_mut(),
1589	self.text,
1590	self.last_end,
1591	self.text.len(),
1592	);
1593	let m = match result {
1594	Err(err) => return Some(Err(err)),
1595	Ok(None) => return None,
1596	Ok(Some(m)) => m,
1597	};
1598	if m.is_empty() {
1599	// This is an empty match. To ensure we make progress, start
1600	// the next search at the smallest possible starting position
1601	// of the next match following this one.
1602	self.last_end = if self.re.utf8 {
1603	crate::util::next_utf8(self.text, m.end())
1604	} else {
1605	m.end() + `1`
1606	};
1607	// Don't accept empty matches immediately following a match.
1608	// Just move on to the next match.
1609	if Some(m.end()) == self.last_match {
1610	return self.next();
1611	}
1612	} else {
1613	self.last_end = m.end();
1614	}
1615	self.last_match = Some(m.end());
1616	Some(Ok(m))
1617	}
1618	}
1619
1620	/// An iterator over all non-overlapping leftmost matches for a particular
1621	/// fallible search.
1622	///
1623	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1624	/// found.
1625	///
1626	/// `A` is the type used to represent the underlying DFAs used by the regex,
1627	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1628	/// as follows:
1629	///
1630	/// `'r` is the lifetime of the regular expression itself.*
1631	/// `'t` is the lifetime of the text being searched.*
1632	#[derive(Clone, Debug)]
1633	pub struct TryFindLeftmostMatches<'r, 't, A, P> {
1634	re: &'r Regex<A, P>,
1635	scanner: Option<prefilter::Scanner<'r>>,
1636	text: &'t [u8],
1637	last_end: usize,
1638	last_match: Option<usize>,
1639	}
1640
1641	impl<'r, 't, A: Automaton, P: Prefilter> TryFindLeftmostMatches<'r, 't, A, P> {
1642	fn new(
1643	re: &'r Regex<A, P>,
1644	text: &'t [u8],
1645	) -> TryFindLeftmostMatches<'r, 't, A, P> {
1646	let scanner: Option> = re.scanner();
1647	TryFindLeftmostMatches {
1648	re,
1649	scanner,
1650	text,
1651	last_end: `0`,
1652	last_match: None,
1653	}
1654	}
1655	}
1656
1657	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1658	for TryFindLeftmostMatches<'r, 't, A, P>
1659	{
1660	type Item = Result<MultiMatch, MatchError>;
1661
1662	fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
1663	if self.last_end > self.text.len() {
1664	return None;
1665	}
1666	let result = self.re.try_find_leftmost_at_imp(
1667	self.scanner.as_mut(),
1668	self.text,
1669	self.last_end,
1670	self.text.len(),
1671	);
1672	let m = match result {
1673	Err(err) => return Some(Err(err)),
1674	Ok(None) => return None,
1675	Ok(Some(m)) => m,
1676	};
1677	if m.is_empty() {
1678	// This is an empty match. To ensure we make progress, start
1679	// the next search at the smallest possible starting position
1680	// of the next match following this one.
1681	self.last_end = if self.re.utf8 {
1682	crate::util::next_utf8(self.text, m.end())
1683	} else {
1684	m.end() + `1`
1685	};
1686	// Don't accept empty matches immediately following a match.
1687	// Just move on to the next match.
1688	if Some(m.end()) == self.last_match {
1689	return self.next();
1690	}
1691	} else {
1692	self.last_end = m.end();
1693	}
1694	self.last_match = Some(m.end());
1695	Some(Ok(m))
1696	}
1697	}
1698
1699	/// An iterator over all overlapping matches for a particular fallible search.
1700	///
1701	/// The iterator yields a [`MultiMatch`] value until no more matches could be
1702	/// found.
1703	///
1704	/// `A` is the type used to represent the underlying DFAs used by the regex,
1705	/// while `P` is the type of prefilter used, if any. The lifetime variables are
1706	/// as follows:
1707	///
1708	/// `'r` is the lifetime of the regular expression itself.*
1709	/// `'t` is the lifetime of the text being searched.*
1710	#[derive(Clone, Debug)]
1711	pub struct TryFindOverlappingMatches<'r, 't, A: Automaton, P> {
1712	re: &'r Regex<A, P>,
1713	scanner: Option<prefilter::Scanner<'r>>,
1714	text: &'t [u8],
1715	last_end: usize,
1716	state: OverlappingState,
1717	}
1718
1719	impl<'r, 't, A: Automaton, P: Prefilter>
1720	TryFindOverlappingMatches<'r, 't, A, P>
1721	{
1722	fn new(
1723	re: &'r Regex<A, P>,
1724	text: &'t [u8],
1725	) -> TryFindOverlappingMatches<'r, 't, A, P> {
1726	let scanner: Option> = re.scanner();
1727	TryFindOverlappingMatches {
1728	re,
1729	scanner,
1730	text,
1731	last_end: `0`,
1732	state: OverlappingState::start(),
1733	}
1734	}
1735	}
1736
1737	impl<'r, 't, A: Automaton, P: Prefilter> Iterator
1738	for TryFindOverlappingMatches<'r, 't, A, P>
1739	{
1740	type Item = Result<MultiMatch, MatchError>;
1741
1742	fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
1743	if self.last_end > self.text.len() {
1744	return None;
1745	}
1746	let result = self.re.try_find_overlapping_at_imp(
1747	self.scanner.as_mut(),
1748	self.text,
1749	self.last_end,
1750	self.text.len(),
1751	&mut self.state,
1752	);
1753	let m = match result {
1754	Err(err) => return Some(Err(err)),
1755	Ok(None) => return None,
1756	Ok(Some(m)) => m,
1757	};
1758	// Unlike the non-overlapping case, we're OK with empty matches at this
1759	// level. In particular, the overlapping search algorithm is itself
1760	// responsible for ensuring that progress is always made.
1761	self.last_end = m.end();
1762	Some(Ok(m))
1763	}
1764	}
1765
1766	/// The configuration used for compiling a DFA-backed regex.
1767	///
1768	/// A regex configuration is a simple data object that is typically used with
1769	/// [`Builder::configure`].
1770	#[cfg(feature = "alloc")]
1771	#[derive(Clone, Copy, Debug, Default)]
1772	pub struct Config {
1773	utf8: Option<bool>,
1774	}
1775
1776	#[cfg(feature = "alloc")]
1777	impl Config {
1778	/// Return a new default regex compiler configuration.
1779	pub fn new() -> Config {
1780	Config::default()
1781	}
1782
1783	/// Whether to enable UTF-8 mode or not.
1784	///
1785	/// When UTF-8 mode is enabled (the default) and an empty match is seen,
1786	/// the iterators on [`Regex`] will always start the next search at the
1787	/// next UTF-8 encoded codepoint when searching valid UTF-8. When UTF-8
1788	/// mode is disabled, such searches are begun at the next byte offset.
1789	///
1790	/// If this mode is enabled and invalid UTF-8 is given to search, then
1791	/// behavior is unspecified.
1792	///
1793	/// Generally speaking, one should enable this when
1794	/// [`SyntaxConfig::utf8`](crate::SyntaxConfig::utf8)
1795	/// and
1796	/// [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8)
1797	/// are enabled, and disable it otherwise.
1798	///
1799	/// # Example
1800	///
1801	/// This example demonstrates the differences between when this option is
1802	/// enabled and disabled. The differences only arise when the regex can
1803	/// return matches of length zero.
1804	///
1805	/// In this first snippet, we show the results when UTF-8 mode is disabled.
1806	///
1807	/// ```
1808	/// use regex_automata::{dfa::regex::Regex, MultiMatch};
1809	///
1810	/// let re = Regex::builder()
1811	/// .configure(Regex::config().utf8(false))
1812	/// .build(r"")?;
1813	/// let haystack = "a☃z".as_bytes();
1814	/// let mut it = re.find_leftmost_iter(haystack);
1815	/// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
1816	/// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
1817	/// assert_eq!(Some(MultiMatch::must(0, 2, 2)), it.next());
1818	/// assert_eq!(Some(MultiMatch::must(0, 3, 3)), it.next());
1819	/// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
1820	/// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
1821	/// assert_eq!(None, it.next());
1822	///
1823	/// # Ok::<(), Box<dyn std::error::Error>>(())
1824	/// ```
1825	///
1826	/// And in this snippet, we execute the same search on the same haystack,
1827	/// but with UTF-8 mode enabled. Notice that byte offsets that would
1828	/// otherwise split the encoding of `☃` are not returned.
1829	///
1830	/// ```
1831	/// use regex_automata::{dfa::regex::Regex, MultiMatch};
1832	///
1833	/// let re = Regex::builder()
1834	/// .configure(Regex::config().utf8(true))
1835	/// .build(r"")?;
1836	/// let haystack = "a☃z".as_bytes();
1837	/// let mut it = re.find_leftmost_iter(haystack);
1838	/// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
1839	/// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
1840	/// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
1841	/// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
1842	/// assert_eq!(None, it.next());
1843	///
1844	/// # Ok::<(), Box<dyn std::error::Error>>(())
1845	/// ```
1846	pub fn utf8(mut self, yes: bool) -> Config {
1847	self.utf8 = Some(yes);
1848	self
1849	}
1850
1851	/// Returns true if and only if this configuration has UTF-8 mode enabled.
1852	///
1853	/// When UTF-8 mode is enabled and an empty match is seen, the iterators on
1854	/// [`Regex`] will always start the next search at the next UTF-8 encoded
1855	/// codepoint. When UTF-8 mode is disabled, such searches are begun at the
1856	/// next byte offset.
1857	pub fn get_utf8(&self) -> bool {
1858	self.utf8.unwrap_or(`true`)
1859	}
1860
1861	/// Overwrite the default configuration such that the options in `o` are
1862	/// always used. If an option in `o` is not set, then the corresponding
1863	/// option in `self` is used. If it's not set in `self` either, then it
1864	/// remains not set.
1865	pub(crate) fn overwrite(self, o: Config) -> Config {
1866	Config { utf8: o.utf8.or(self.utf8) }
1867	}
1868	}
1869
1870	/// A builder for a regex based on deterministic finite automatons.
1871	///
1872	/// This builder permits configuring options for the syntax of a pattern, the
1873	/// NFA construction, the DFA construction and finally the regex searching
1874	/// itself. This builder is different from a general purpose regex builder in
1875	/// that it permits fine grain configuration of the construction process. The
1876	/// trade off for this is complexity, and the possibility of setting a
1877	/// configuration that might not make sense. For example, there are three
1878	/// different UTF-8 modes:
1879	///
1880	/// [`SyntaxConfig::utf8`](crate::SyntaxConfig::utf8) controls whether the*
1881	/// pattern itself can contain sub-expressions that match invalid UTF-8.
1882	/// [`nfa::thompson::Config::utf8`](crate::nfa::thompson::Config::utf8)*
1883	/// controls whether the implicit unanchored prefix added to the NFA can
1884	/// match through invalid UTF-8 or not.
1885	/// [`Config::utf8`] controls how the regex iterators themselves advance*
1886	/// the starting position of the next search when a match with zero length is
1887	/// found.
1888	///
1889	/// Generally speaking, callers will want to either enable all of these or
1890	/// disable all of these.
1891	///
1892	/// Internally, building a regex requires building two DFAs, where one is
1893	/// responsible for finding the end of a match and the other is responsible
1894	/// for finding the start of a match. If you only need to detect whether
1895	/// something matched, or only the end of a match, then you should use a
1896	/// [`dense::Builder`] to construct a single DFA, which is cheaper than
1897	/// building two DFAs.
1898	///
1899	/// # Build methods
1900	///
1901	/// This builder has a few "build" methods. In general, it's the result of
1902	/// combining the following parameters:
1903	///
1904	/// Building one or many regexes.*
1905	/// Building a regex with dense or sparse DFAs.*
1906	///
1907	/// The simplest "build" method is [`Builder::build`]. It accepts a single
1908	/// pattern and builds a dense DFA using `usize` for the state identifier
1909	/// representation.
1910	///
1911	/// The most general "build" method is [`Builder::build_many`], which permits
1912	/// building a regex that searches for multiple patterns simultaneously while
1913	/// using a specific state identifier representation.
1914	///
1915	/// The most flexible "build" method, but hardest to use, is
1916	/// [`Builder::build_from_dfas`]. This exposes the fact that a [`Regex`] is
1917	/// just a pair of DFAs, and this method allows you to specify those DFAs
1918	/// exactly.
1919	///
1920	/// # Example
1921	///
1922	/// This example shows how to disable UTF-8 mode in the syntax, the NFA and
1923	/// the regex itself. This is generally what you want for matching on
1924	/// arbitrary bytes.
1925	///
1926	/// ```
1927	/// use regex_automata::{
1928	/// dfa::regex::Regex, nfa::thompson, MultiMatch, SyntaxConfig
1929	/// };
1930	///
1931	/// let re = Regex::builder()
1932	/// .configure(Regex::config().utf8(false))
1933	/// .syntax(SyntaxConfig::new().utf8(false))
1934	/// .thompson(thompson::Config::new().utf8(false))
1935	/// .build(r"foo(?-u:[^b])ar.")?;*
1936	/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
1937	/// let expected = Some(MultiMatch::must(0, 1, 9));
1938	/// let got = re.find_leftmost(haystack);
1939	/// assert_eq!(expected, got);
1940	/// // Notice that `(?-u:[^b])` matches invalid UTF-8,
1941	/// // but the subsequent `.` does not! Disabling UTF-8*
1942	/// // on the syntax permits this. Notice also that the
1943	/// // search was unanchored and skipped over invalid UTF-8.
1944	/// // Disabling UTF-8 on the Thompson NFA permits this.
1945	/// //
1946	/// // N.B. This example does not show the impact of
1947	/// // disabling UTF-8 mode on Config, since that
1948	/// // only impacts regexes that can produce matches of
1949	/// // length 0.
1950	/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]);
1951	///
1952	/// # Ok::<(), Box<dyn std::error::Error>>(())
1953	/// ```
1954	#[cfg(feature = "alloc")]
1955	#[derive(Clone, Debug)]
1956	pub struct Builder {
1957	config: Config,
1958	dfa: dense::Builder,
1959	}
1960
1961	#[cfg(feature = "alloc")]
1962	impl Builder {
1963	/// Create a new regex builder with the default configuration.
1964	pub fn new() -> Builder {
1965	Builder { config: Config::default(), dfa: dense::Builder::new() }
1966	}
1967
1968	/// Build a regex from the given pattern.
1969	///
1970	/// If there was a problem parsing or compiling the pattern, then an error
1971	/// is returned.
1972	pub fn build(&self, pattern: &str) -> Result<Regex, Error> {
1973	self.build_many(&[pattern])
1974	}
1975
1976	/// Build a regex from the given pattern using sparse DFAs.
1977	///
1978	/// If there was a problem parsing or compiling the pattern, then an error
1979	/// is returned.
1980	pub fn build_sparse(
1981	&self,
1982	pattern: &str,
1983	) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
1984	self.build_many_sparse(&[pattern])
1985	}
1986
1987	/// Build a regex from the given patterns.
1988	pub fn build_many<P: AsRef<str>>(
1989	&self,
1990	patterns: &[P],
1991	) -> Result<Regex, Error> {
1992	let forward = self.dfa.build_many(patterns)?;
1993	let reverse = self
1994	.dfa
1995	.clone()
1996	.configure(
1997	dense::Config::new()
1998	.anchored(`true`)
1999	.match_kind(MatchKind::All)
2000	.starts_for_each_pattern(`true`),
2001	)
2002	.thompson(thompson::Config::new().reverse(`true`))
2003	.build_many(patterns)?;
2004	Ok(self.build_from_dfas(forward, reverse))
2005	}
2006
2007	/// Build a sparse regex from the given patterns.
2008	pub fn build_many_sparse<P: AsRef<str>>(
2009	&self,
2010	patterns: &[P],
2011	) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
2012	let re = self.build_many(patterns)?;
2013	let forward = re.forward().to_sparse()?;
2014	let reverse = re.reverse().to_sparse()?;
2015	Ok(self.build_from_dfas(forward, reverse))
2016	}
2017
2018	/// Build a regex from its component forward and reverse DFAs.
2019	///
2020	/// This is useful when deserializing a regex from some arbitrary
2021	/// memory region. This is also useful for building regexes from other
2022	/// types of DFAs.
2023	///
2024	/// If you're building the DFAs from scratch instead of building new DFAs
2025	/// from other DFAs, then you'll need to make sure that the reverse DFA is
2026	/// configured correctly to match the intended semantics. Namely:
2027	///
2028	/// It should be anchored.*
2029	/// It should use [`MatchKind::All`] semantics.*
2030	/// It should match in reverse.*
2031	/// It should have anchored start states compiled for each pattern.*
2032	/// Otherwise, its configuration should match the forward DFA.*
2033	///
2034	/// If these conditions are satisfied, then behavior of searches is
2035	/// unspecified.
2036	///
2037	/// Note that when using this constructor, only the configuration from
2038	/// [`Config`] is applied. The only configuration settings on this builder
2039	/// only apply when the builder owns the construction of the DFAs
2040	/// themselves.
2041	///
2042	/// # Example
2043	///
2044	/// This example is a bit a contrived. The usual use of these methods
2045	/// would involve serializing `initial_re` somewhere and then deserializing
2046	/// it later to build a regex. But in this case, we do everything in
2047	/// memory.
2048	///
2049	/// ```
2050	/// use regex_automata::dfa::regex::Regex;
2051	///
2052	/// let initial_re = Regex::new("foo[0-9]+")?;
2053	/// assert_eq!(true, initial_re.is_match(b"foo123"));
2054	///
2055	/// let (fwd, rev) = (initial_re.forward(), initial_re.reverse());
2056	/// let re = Regex::builder().build_from_dfas(fwd, rev);
2057	/// assert_eq!(true, re.is_match(b"foo123"));
2058	/// # Ok::<(), Box<dyn std::error::Error>>(())
2059	/// ```
2060	///
2061	/// This example shows how to build a `Regex` that uses sparse DFAs instead
2062	/// of dense DFAs without using one of the convenience `build_sparse`
2063	/// routines:
2064	///
2065	/// ```
2066	/// use regex_automata::dfa::regex::Regex;
2067	///
2068	/// let initial_re = Regex::new("foo[0-9]+")?;
2069	/// assert_eq!(true, initial_re.is_match(b"foo123"));
2070	///
2071	/// let fwd = initial_re.forward().to_sparse()?;
2072	/// let rev = initial_re.reverse().to_sparse()?;
2073	/// let re = Regex::builder().build_from_dfas(fwd, rev);
2074	/// assert_eq!(true, re.is_match(b"foo123"));
2075	/// # Ok::<(), Box<dyn std::error::Error>>(())
2076	/// ```
2077	pub fn build_from_dfas<A: Automaton>(
2078	&self,
2079	forward: A,
2080	reverse: A,
2081	) -> Regex<A> {
2082	let utf8 = self.config.get_utf8();
2083	Regex { prefilter: None, forward, reverse, utf8 }
2084	}
2085
2086	/// Apply the given regex configuration options to this builder.
2087	pub fn configure(&mut self, config: Config) -> &mut Builder {
2088	self.config = self.config.overwrite(config);
2089	self
2090	}
2091
2092	/// Set the syntax configuration for this builder using
2093	/// [`SyntaxConfig`](crate::SyntaxConfig).
2094	///
2095	/// This permits setting things like case insensitivity, Unicode and multi
2096	/// line mode.
2097	pub fn syntax(
2098	&mut self,
2099	config: crate::util::syntax::SyntaxConfig,
2100	) -> &mut Builder {
2101	self.dfa.syntax(config);
2102	self
2103	}
2104
2105	/// Set the Thompson NFA configuration for this builder using
2106	/// [`nfa::thompson::Config`](thompson::Config).
2107	///
2108	/// This permits setting things like whether additional time should be
2109	/// spent shrinking the size of the NFA.
2110	pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
2111	self.dfa.thompson(config);
2112	self
2113	}
2114
2115	/// Set the dense DFA compilation configuration for this builder using
2116	/// [`dense::Config`](dense::Config).
2117	///
2118	/// This permits setting things like whether the underlying DFAs should
2119	/// be minimized.
2120	pub fn dense(&mut self, config: dense::Config) -> &mut Builder {
2121	self.dfa.configure(config);
2122	self
2123	}
2124	}
2125
2126	#[cfg(feature = "alloc")]
2127	impl Default for Builder {
2128	fn default() -> Builder {
2129	Builder::new()
2130	}
2131	}
2132
2133	#[inline(always)]
2134	fn next_unwrap(
2135	item: Option<Result<MultiMatch, MatchError>>,
2136	) -> Option<MultiMatch> {
2137	match item {
2138	None => None,
2139	Some(Ok(m: MultiMatch)) => Some(m),
2140	Some(Err(err: MatchError)) => panic!(
2141	"unexpected regex search error: {}`\n`\
2142	to handle search errors, use try_ methods",
2143	err,
2144	),
2145	}
2146	}
2147