search.rs source code [crates/regex-automata-0.4.6/src/util/search.rs]

1	/!*
2	Types and routines that support the search APIs of most regex engines.
3
4	This sub-module isn't exposed directly, but rather, its contents are exported
5	at the crate root due to the universality of most of the types and routines in
6	this module.
7	*/
8
9	use core::ops::{Range, RangeBounds};
10
11	use crate::util::{escape::DebugByte, primitives::PatternID, utf8};
12
13	/// The parameters for a regex search including the haystack to search.
14	///
15	/// It turns out that regex searches have a few parameters, and in most cases,
16	/// those parameters have defaults that work in the vast majority of cases.
17	/// This `Input` type exists to make that common case seamnless while also
18	/// providing an avenue for changing the parameters of a search. In particular,
19	/// this type enables doing so without a combinatorial explosion of different
20	/// methods and/or superfluous parameters in the common cases.
21	///
22	/// An `Input` permits configuring the following things:
23	///
24	/// Search only a substring of a haystack, while taking the broader context*
25	/// into account for resolving look-around assertions.
26	/// Indicating whether to search for all patterns in a regex, or to*
27	/// only search for one pattern in particular.
28	/// Whether to perform an anchored on unanchored search.*
29	/// Whether to report a match as early as possible.*
30	///
31	/// All of these parameters, except for the haystack, have sensible default
32	/// values. This means that the minimal search configuration is simply a call
33	/// to [`Input::new`] with your haystack. Setting any other parameter is
34	/// optional.
35	///
36	/// Moreover, for any `H` that implements `AsRef<[u8]>`, there exists a
37	/// `From<H> for Input` implementation. This is useful because many of the
38	/// search APIs in this crate accept an `Into<Input>`. This means you can
39	/// provide string or byte strings to these routines directly, and they'll
40	/// automatically get converted into an `Input` for you.
41	///
42	/// The lifetime parameter `'h` refers to the lifetime of the haystack.
43	///
44	/// # Organization
45	///
46	/// The API of `Input` is split into a few different parts:
47	///
48	/// A builder-like API that transforms a `Input` by value. Examples:*
49	/// [`Input::span`] and [`Input::anchored`].
50	/// A setter API that permits mutating parameters in place. Examples:*
51	/// [`Input::set_span`] and [`Input::set_anchored`].
52	/// A getter API that permits retrieving any of the search parameters.*
53	/// Examples: [`Input::get_span`] and [`Input::get_anchored`].
54	/// A few convenience getter routines that don't conform to the above naming*
55	/// pattern due to how common they are. Examples: [`Input::haystack`],
56	/// [`Input::start`] and [`Input::end`].
57	/// Miscellaneous predicates and other helper routines that are useful*
58	/// in some contexts. Examples: [`Input::is_char_boundary`].
59	///
60	/// A `Input` exposes so much because it is meant to be used by both callers of
61	/// regex engines _and_ implementors of regex engines. A constraining factor is
62	/// that regex engines should accept a `&Input` as its lowest level API, which
63	/// means that implementors should only use the "getter" APIs of a `Input`.
64	///
65	/// # Valid bounds and search termination
66	///
67	/// An `Input` permits setting the bounds of a search via either
68	/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or
69	/// else a panic will occur. Bounds are valid if and only if:
70	///
71	/// The bounds represent a valid range into the input's haystack.*
72	/// * or the end bound is a valid ending bound for the haystack and
73	/// the start bound is exactly one greater than the start bound.
74	///
75	/// In the latter case, [`Input::is_done`] will return true and indicates any
76	/// search receiving such an input should immediately return with no match.
77	///
78	/// Note that while `Input` is used for reverse searches in this crate, the
79	/// `Input::is_done` predicate assumes a forward search. Because unsigned
80	/// offsets are used internally, there is no way to tell from only the offsets
81	/// whether a reverse search is done or not.
82	///
83	/// # Regex engine support
84	///
85	/// Any regex engine accepting an `Input` must support at least the following
86	/// things:
87	///
88	/// Searching a `&[u8]` for matches.*
89	/// Searching a substring of `&[u8]` for a match, such that any match*
90	/// reported must appear entirely within that substring.
91	/// For a forwards search, a match should never be reported when*
92	/// [`Input::is_done`] returns true. (For reverse searches, termination should
93	/// be handled outside of `Input`.)
94	///
95	/// Supporting other aspects of an `Input` are optional, but regex engines
96	/// should handle aspects they don't support gracefully. How this is done is
97	/// generally up to the regex engine. This crate generally treats unsupported
98	/// anchored modes as an error to report for example, but for simplicity, in
99	/// the meta regex engine, trying to search with an invalid pattern ID just
100	/// results in no match being reported.
101	#[derive(Clone)]
102	pub struct Input<'h> {
103	haystack: &'h [u8],
104	span: Span,
105	anchored: Anchored,
106	earliest: bool,
107	}
108
109	impl<'h> Input<'h> {
110	/// Create a new search configuration for the given haystack.
111	#[inline]
112	pub fn new<H: ?Sized + AsRef<[u8]>>(haystack: &'h H) -> Input<'h> {
113	// Perform only one call to `haystack.as_ref()` to protect from incorrect
114	// implementations that return different values from multiple calls.
115	// This is important because there's code that relies on `span` not being
116	// out of bounds with respect to the stored `haystack`.
117	let haystack = haystack.as_ref();
118	Input {
119	haystack,
120	span: Span { start: `0`, end: haystack.len() },
121	anchored: Anchored::No,
122	earliest: `false`,
123	}
124	}
125
126	/// Set the span for this search.
127	///
128	/// This routine does not panic if the span given is not a valid range for
129	/// this search's haystack. If this search is run with an invalid range,
130	/// then the most likely outcome is that the actual search execution will
131	/// panic.
132	///
133	/// This routine is generic over how a span is provided. While
134	/// a [`Span`] may be given directly, one may also provide a
135	/// `std::ops::Range<usize>`. To provide anything supported by range
136	/// syntax, use the [`Input::range`] method.
137	///
138	/// The default span is the entire haystack.
139	///
140	/// Note that [`Input::range`] overrides this method and vice versa.
141	///
142	/// # Panics
143	///
144	/// This panics if the given span does not correspond to valid bounds in
145	/// the haystack or the termination of a search.
146	///
147	/// # Example
148	///
149	/// This example shows how the span of the search can impact whether a
150	/// match is reported or not. This is particularly relevant for look-around
151	/// operators, which might take things outside of the span into account
152	/// when determining whether they match.
153	///
154	/// ```
155	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
156	/// use regex_automata::{
157	/// nfa::thompson::pikevm::PikeVM,
158	/// Match, Input,
159	/// };
160	///
161	/// // Look for 'at', but as a distinct word.
162	/// let re = PikeVM::new(r"\bat\b")?;
163	/// let mut cache = re.create_cache();
164	/// let mut caps = re.create_captures();
165	///
166	/// // Our haystack contains 'at', but not as a distinct word.
167	/// let haystack = "batter";
168	///
169	/// // A standard search finds nothing, as expected.
170	/// let input = Input::new(haystack);
171	/// re.search(&mut cache, &input, &mut caps);
172	/// assert_eq!(None, caps.get_match());
173	///
174	/// // But if we wanted to search starting at position '1', we might
175	/// // slice the haystack. If we do this, it's impossible for the \b
176	/// // anchors to take the surrounding context into account! And thus,
177	/// // a match is produced.
178	/// let input = Input::new(&haystack[`1`..`3`]);
179	/// re.search(&mut cache, &input, &mut caps);
180	/// assert_eq!(Some(Match::must(`0`, `0`..`2`)), caps.get_match());
181	///
182	/// // But if we specify the span of the search instead of slicing the
183	/// // haystack, then the regex engine can "see" outside of the span
184	/// // and resolve the anchors correctly.
185	/// let input = Input::new(haystack).span(`1`..`3`);
186	/// re.search(&mut cache, &input, &mut caps);
187	/// assert_eq!(None, caps.get_match());
188	///
189	/// # Ok::<(), Box<dyn std::error::Error>>(())
190	/// ```
191	///
192	/// This may seem a little ham-fisted, but this scenario tends to come up
193	/// if some other regex engine found the match span and now you need to
194	/// re-process that span to look for capturing groups. (e.g., Run a faster
195	/// DFA first, find a match, then run the PikeVM on just the match span to
196	/// resolve capturing groups.) In order to implement that sort of logic
197	/// correctly, you need to set the span on the search instead of slicing
198	/// the haystack directly.
199	///
200	/// The other advantage of using this routine to specify the bounds of the
201	/// search is that the match offsets are still reported in terms of the
202	/// original haystack. For example, the second search in the example above
203	/// reported a match at position `0`, even though `at` starts at offset
204	/// `1` because we sliced the haystack.
205	#[inline]
206	pub fn span<S: Into<Span>>(mut self, span: S) -> Input<'h> {
207	self.set_span(span);
208	self
209	}
210
211	/// Like `Input::span`, but accepts any range instead.
212	///
213	/// This routine does not panic if the range given is not a valid range for
214	/// this search's haystack. If this search is run with an invalid range,
215	/// then the most likely outcome is that the actual search execution will
216	/// panic.
217	///
218	/// The default range is the entire haystack.
219	///
220	/// Note that [`Input::span`] overrides this method and vice versa.
221	///
222	/// # Panics
223	///
224	/// This routine will panic if the given range could not be converted
225	/// to a valid [`Range`]. For example, this would panic when given
226	/// `0..=usize::MAX` since it cannot be represented using a half-open
227	/// interval in terms of `usize`.
228	///
229	/// This also panics if the given range does not correspond to valid bounds
230	/// in the haystack or the termination of a search.
231	///
232	/// # Example
233	///
234	/// ```
235	/// use regex_automata::Input;
236	///
237	/// let input = Input::new("foobar");
238	/// assert_eq!(`0`..`6`, input.get_range());
239	///
240	/// let input = Input::new("foobar").range(`2`..=`4`);
241	/// assert_eq!(`2`..`5`, input.get_range());
242	/// ```
243	#[inline]
244	pub fn range<R: RangeBounds<usize>>(mut self, range: R) -> Input<'h> {
245	self.set_range(range);
246	self
247	}
248
249	/// Sets the anchor mode of a search.
250	///
251	/// When a search is anchored (so that's [`Anchored::Yes`] or
252	/// [`Anchored::Pattern`]), a match must begin at the start of a search.
253	/// When a search is not anchored (that's [`Anchored::No`]), regex engines
254	/// will behave as if the pattern started with a `(?s-u:.)?`. This prefix*
255	/// permits a match to appear anywhere.
256	///
257	/// By default, the anchored mode is [`Anchored::No`].
258	///
259	/// WARNING:* this is subtly different than using a `^` at the start of*
260	/// your regex. A `^` forces a regex to match exclusively at the start of
261	/// a haystack, regardless of where you begin your search. In contrast,
262	/// anchoring a search will allow your regex to match anywhere in your
263	/// haystack, but the match must start at the beginning of a search.
264	///
265	/// For example, consider the haystack `aba` and the following searches:
266	///
267	/// 1. The regex `^a` is compiled with `Anchored::No` and searches `aba`
268	/// starting at position `2`. Since `^` requires the match to start at
269	/// the beginning of the haystack and `2 > 0`, no match is found.
270	/// 2. The regex `a` is compiled with `Anchored::Yes` and searches `aba`
271	/// starting at position `2`. This reports a match at `[2, 3]` since
272	/// the match starts where the search started. Since there is no `^`,
273	/// there is no requirement for the match to start at the beginning of
274	/// the haystack.
275	/// 3. The regex `a` is compiled with `Anchored::Yes` and searches `aba`
276	/// starting at position `1`. Since `b` corresponds to position `1` and
277	/// since the search is anchored, it finds no match. While the regex
278	/// matches at other positions, configuring the search to be anchored
279	/// requires that it only report a match that begins at the same offset
280	/// as the beginning of the search.
281	/// 4. The regex `a` is compiled with `Anchored::No` and searches `aba`
282	/// starting at position `1`. Since the search is not anchored and
283	/// the regex does not start with `^`, the search executes as if there
284	/// is a `(?s:.)?` prefix that permits it to match anywhere. Thus, it*
285	/// reports a match at `[2, 3]`.
286	///
287	/// Note that the [`Anchored::Pattern`] mode is like `Anchored::Yes`,
288	/// except it only reports matches for a particular pattern.
289	///
290	/// # Example
291	///
292	/// This demonstrates the differences between an anchored search and
293	/// a pattern that begins with `^` (as described in the above warning
294	/// message).
295	///
296	/// ```
297	/// use regex_automata::{
298	/// nfa::thompson::pikevm::PikeVM,
299	/// Anchored, Match, Input,
300	/// };
301	///
302	/// let haystack = "aba";
303	///
304	/// let re = PikeVM::new(r"^a")?;
305	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
306	/// let input = Input::new(haystack).span(`2`..`3`).anchored(Anchored::No);
307	/// re.search(&mut cache, &input, &mut caps);
308	/// // No match is found because 2 is not the beginning of the haystack,
309	/// // which is what ^ requires.
310	/// assert_eq!(None, caps.get_match());
311	///
312	/// let re = PikeVM::new(r"a")?;
313	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
314	/// let input = Input::new(haystack).span(`2`..`3`).anchored(Anchored::Yes);
315	/// re.search(&mut cache, &input, &mut caps);
316	/// // An anchored search can still match anywhere in the haystack, it just
317	/// // must begin at the start of the search which is '2' in this case.
318	/// assert_eq!(Some(Match::must(`0`, `2`..`3`)), caps.get_match());
319	///
320	/// let re = PikeVM::new(r"a")?;
321	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
322	/// let input = Input::new(haystack).span(`1`..`3`).anchored(Anchored::Yes);
323	/// re.search(&mut cache, &input, &mut caps);
324	/// // No match is found since we start searching at offset 1 which
325	/// // corresponds to 'b'. Since there is no '(?s:.)?' prefix, no match*
326	/// // is found.
327	/// assert_eq!(None, caps.get_match());
328	///
329	/// let re = PikeVM::new(r"a")?;
330	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
331	/// let input = Input::new(haystack).span(`1`..`3`).anchored(Anchored::No);
332	/// re.search(&mut cache, &input, &mut caps);
333	/// // Since anchored=no, an implicit '(?s:.)?' prefix was added to the*
334	/// // pattern. Even though the search starts at 'b', the 'match anything'
335	/// // prefix allows the search to match 'a'.
336	/// let expected = Some(Match::must(`0`, `2`..`3`));
337	/// assert_eq!(expected, caps.get_match());
338	///
339	/// # Ok::<(), Box<dyn std::error::Error>>(())
340	/// ```
341	#[inline]
342	pub fn anchored(mut self, mode: Anchored) -> Input<'h> {
343	self.set_anchored(mode);
344	self
345	}
346
347	/// Whether to execute an "earliest" search or not.
348	///
349	/// When running a non-overlapping search, an "earliest" search will return
350	/// the match location as early as possible. For example, given a pattern
351	/// of `foo[0-9]+` and a haystack of `foo12345`, a normal leftmost search
352	/// will return `foo12345` as a match. But an "earliest" search for regex
353	/// engines that support "earliest" semantics will return `foo1` as a
354	/// match, since as soon as the first digit following `foo` is seen, it is
355	/// known to have found a match.
356	///
357	/// Note that "earliest" semantics generally depend on the regex engine.
358	/// Different regex engines may determine there is a match at different
359	/// points. So there is no guarantee that "earliest" matches will always
360	/// return the same offsets for all regex engines. The "earliest" notion
361	/// is really about when the particular regex engine determines there is
362	/// a match rather than a consistent semantic unto itself. This is often
363	/// useful for implementing "did a match occur or not" predicates, but
364	/// sometimes the offset is useful as well.
365	///
366	/// This is disabled by default.
367	///
368	/// # Example
369	///
370	/// This example shows the difference between "earliest" searching and
371	/// normal searching.
372	///
373	/// ```
374	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input};
375	///
376	/// let re = PikeVM::new(r"foo[0-9]+")?;
377	/// let mut cache = re.create_cache();
378	/// let mut caps = re.create_captures();
379	///
380	/// // A normal search implements greediness like you expect.
381	/// let input = Input::new("foo12345");
382	/// re.search(&mut cache, &input, &mut caps);
383	/// assert_eq!(Some(Match::must(`0`, `0`..`8`)), caps.get_match());
384	///
385	/// // When 'earliest' is enabled and the regex engine supports
386	/// // it, the search will bail once it knows a match has been
387	/// // found.
388	/// let input = Input::new("foo12345").earliest(`true`);
389	/// re.search(&mut cache, &input, &mut caps);
390	/// assert_eq!(Some(Match::must(`0`, `0`..`4`)), caps.get_match());
391	/// # Ok::<(), Box<dyn std::error::Error>>(())
392	/// ```
393	#[inline]
394	pub fn earliest(mut self, yes: bool) -> Input<'h> {
395	self.set_earliest(yes);
396	self
397	}
398
399	/// Set the span for this search configuration.
400	///
401	/// This is like the [`Input::span`] method, except this mutates the
402	/// span in place.
403	///
404	/// This routine is generic over how a span is provided. While
405	/// a [`Span`] may be given directly, one may also provide a
406	/// `std::ops::Range<usize>`.
407	///
408	/// # Panics
409	///
410	/// This panics if the given span does not correspond to valid bounds in
411	/// the haystack or the termination of a search.
412	///
413	/// # Example
414	///
415	/// ```
416	/// use regex_automata::Input;
417	///
418	/// let mut input = Input::new("foobar");
419	/// assert_eq!(`0`..`6`, input.get_range());
420	/// input.set_span(`2`..`4`);
421	/// assert_eq!(`2`..`4`, input.get_range());
422	/// ```
423	#[inline]
424	pub fn set_span<S: Into<Span>>(&mut self, span: S) {
425	let span = span.into();
426	assert!(
427	span.end <= self.haystack.len()
428	&& span.start <= span.end.wrapping_add(`1`),
429	"invalid span {:?} for haystack of length {}",
430	span,
431	self.haystack.len(),
432	);
433	self.span = span;
434	}
435
436	/// Set the span for this search configuration given any range.
437	///
438	/// This is like the [`Input::range`] method, except this mutates the
439	/// span in place.
440	///
441	/// This routine does not panic if the range given is not a valid range for
442	/// this search's haystack. If this search is run with an invalid range,
443	/// then the most likely outcome is that the actual search execution will
444	/// panic.
445	///
446	/// # Panics
447	///
448	/// This routine will panic if the given range could not be converted
449	/// to a valid [`Range`]. For example, this would panic when given
450	/// `0..=usize::MAX` since it cannot be represented using a half-open
451	/// interval in terms of `usize`.
452	///
453	/// This also panics if the given span does not correspond to valid bounds
454	/// in the haystack or the termination of a search.
455	///
456	/// # Example
457	///
458	/// ```
459	/// use regex_automata::Input;
460	///
461	/// let mut input = Input::new("foobar");
462	/// assert_eq!(`0`..`6`, input.get_range());
463	/// input.set_range(`2`..=`4`);
464	/// assert_eq!(`2`..`5`, input.get_range());
465	/// ```
466	#[inline]
467	pub fn set_range<R: RangeBounds<usize>>(&mut self, range: R) {
468	use core::ops::Bound;
469
470	// It's a little weird to convert ranges into spans, and then spans
471	// back into ranges when we actually slice the haystack. Because
472	// of that process, we always represent everything as a half-open
473	// internal. Therefore, handling things like m..=n is a little awkward.
474	let start = match range.start_bound() {
475	Bound::Included(&i) => i,
476	// Can this case ever happen? Range syntax doesn't support it...
477	Bound::Excluded(&i) => i.checked_add(`1`).unwrap(),
478	Bound::Unbounded => `0`,
479	};
480	let end = match range.end_bound() {
481	Bound::Included(&i) => i.checked_add(`1`).unwrap(),
482	Bound::Excluded(&i) => i,
483	Bound::Unbounded => self.haystack().len(),
484	};
485	self.set_span(Span { start, end });
486	}
487
488	/// Set the starting offset for the span for this search configuration.
489	///
490	/// This is a convenience routine for only mutating the start of a span
491	/// without having to set the entire span.
492	///
493	/// # Panics
494	///
495	/// This panics if the span resulting from the new start position does not
496	/// correspond to valid bounds in the haystack or the termination of a
497	/// search.
498	///
499	/// # Example
500	///
501	/// ```
502	/// use regex_automata::Input;
503	///
504	/// let mut input = Input::new("foobar");
505	/// assert_eq!(`0`..`6`, input.get_range());
506	/// input.set_start(`5`);
507	/// assert_eq!(`5`..`6`, input.get_range());
508	/// ```
509	#[inline]
510	pub fn set_start(&mut self, start: usize) {
511	self.set_span(Span { start, ..self.get_span() });
512	}
513
514	/// Set the ending offset for the span for this search configuration.
515	///
516	/// This is a convenience routine for only mutating the end of a span
517	/// without having to set the entire span.
518	///
519	/// # Panics
520	///
521	/// This panics if the span resulting from the new end position does not
522	/// correspond to valid bounds in the haystack or the termination of a
523	/// search.
524	///
525	/// # Example
526	///
527	/// ```
528	/// use regex_automata::Input;
529	///
530	/// let mut input = Input::new("foobar");
531	/// assert_eq!(`0`..`6`, input.get_range());
532	/// input.set_end(`5`);
533	/// assert_eq!(`0`..`5`, input.get_range());
534	/// ```
535	#[inline]
536	pub fn set_end(&mut self, end: usize) {
537	self.set_span(Span { end, ..self.get_span() });
538	}
539
540	/// Set the anchor mode of a search.
541	///
542	/// This is like [`Input::anchored`], except it mutates the search
543	/// configuration in place.
544	///
545	/// # Example
546	///
547	/// ```
548	/// use regex_automata::{Anchored, Input, PatternID};
549	///
550	/// let mut input = Input::new("foobar");
551	/// assert_eq!(Anchored::No, input.get_anchored());
552	///
553	/// let pid = PatternID::must(`5`);
554	/// input.set_anchored(Anchored::Pattern(pid));
555	/// assert_eq!(Anchored::Pattern(pid), input.get_anchored());
556	/// ```
557	#[inline]
558	pub fn set_anchored(&mut self, mode: Anchored) {
559	self.anchored = mode;
560	}
561
562	/// Set whether the search should execute in "earliest" mode or not.
563	///
564	/// This is like [`Input::earliest`], except it mutates the search
565	/// configuration in place.
566	///
567	/// # Example
568	///
569	/// ```
570	/// use regex_automata::Input;
571	///
572	/// let mut input = Input::new("foobar");
573	/// assert!(!input.get_earliest());
574	/// input.set_earliest(`true`);
575	/// assert!(input.get_earliest());
576	/// ```
577	#[inline]
578	pub fn set_earliest(&mut self, yes: bool) {
579	self.earliest = yes;
580	}
581
582	/// Return a borrow of the underlying haystack as a slice of bytes.
583	///
584	/// # Example
585	///
586	/// ```
587	/// use regex_automata::Input;
588	///
589	/// let input = Input::new("foobar");
590	/// assert_eq!(b"foobar", input.haystack());
591	/// ```
592	#[inline]
593	pub fn haystack(&self) -> &[u8] {
594	self.haystack
595	}
596
597	/// Return the start position of this search.
598	///
599	/// This is a convenience routine for `search.get_span().start()`.
600	///
601	/// When [`Input::is_done`] is `false`, this is guaranteed to return
602	/// an offset that is less than or equal to [`Input::end`]. Otherwise,
603	/// the offset is one greater than [`Input::end`].
604	///
605	/// # Example
606	///
607	/// ```
608	/// use regex_automata::Input;
609	///
610	/// let input = Input::new("foobar");
611	/// assert_eq!(`0`, input.start());
612	///
613	/// let input = Input::new("foobar").span(`2`..`4`);
614	/// assert_eq!(`2`, input.start());
615	/// ```
616	#[inline]
617	pub fn start(&self) -> usize {
618	self.get_span().start
619	}
620
621	/// Return the end position of this search.
622	///
623	/// This is a convenience routine for `search.get_span().end()`.
624	///
625	/// This is guaranteed to return an offset that is a valid exclusive end
626	/// bound for this input's haystack.
627	///
628	/// # Example
629	///
630	/// ```
631	/// use regex_automata::Input;
632	///
633	/// let input = Input::new("foobar");
634	/// assert_eq!(`6`, input.end());
635	///
636	/// let input = Input::new("foobar").span(`2`..`4`);
637	/// assert_eq!(`4`, input.end());
638	/// ```
639	#[inline]
640	pub fn end(&self) -> usize {
641	self.get_span().end
642	}
643
644	/// Return the span for this search configuration.
645	///
646	/// If one was not explicitly set, then the span corresponds to the entire
647	/// range of the haystack.
648	///
649	/// When [`Input::is_done`] is `false`, the span returned is guaranteed
650	/// to correspond to valid bounds for this input's haystack.
651	///
652	/// # Example
653	///
654	/// ```
655	/// use regex_automata::{Input, Span};
656	///
657	/// let input = Input::new("foobar");
658	/// assert_eq!(Span { start: `0`, end: `6` }, input.get_span());
659	/// ```
660	#[inline]
661	pub fn get_span(&self) -> Span {
662	self.span
663	}
664
665	/// Return the span as a range for this search configuration.
666	///
667	/// If one was not explicitly set, then the span corresponds to the entire
668	/// range of the haystack.
669	///
670	/// When [`Input::is_done`] is `false`, the range returned is guaranteed
671	/// to correspond to valid bounds for this input's haystack.
672	///
673	/// # Example
674	///
675	/// ```
676	/// use regex_automata::Input;
677	///
678	/// let input = Input::new("foobar");
679	/// assert_eq!(`0`..`6`, input.get_range());
680	/// ```
681	#[inline]
682	pub fn get_range(&self) -> Range<usize> {
683	self.get_span().range()
684	}
685
686	/// Return the anchored mode for this search configuration.
687	///
688	/// If no anchored mode was set, then it defaults to [`Anchored::No`].
689	///
690	/// # Example
691	///
692	/// ```
693	/// use regex_automata::{Anchored, Input, PatternID};
694	///
695	/// let mut input = Input::new("foobar");
696	/// assert_eq!(Anchored::No, input.get_anchored());
697	///
698	/// let pid = PatternID::must(`5`);
699	/// input.set_anchored(Anchored::Pattern(pid));
700	/// assert_eq!(Anchored::Pattern(pid), input.get_anchored());
701	/// ```
702	#[inline]
703	pub fn get_anchored(&self) -> Anchored {
704	self.anchored
705	}
706
707	/// Return whether this search should execute in "earliest" mode.
708	///
709	/// # Example
710	///
711	/// ```
712	/// use regex_automata::Input;
713	///
714	/// let input = Input::new("foobar");
715	/// assert!(!input.get_earliest());
716	/// ```
717	#[inline]
718	pub fn get_earliest(&self) -> bool {
719	self.earliest
720	}
721
722	/// Return true if and only if this search can never return any other
723	/// matches.
724	///
725	/// This occurs when the start position of this search is greater than the
726	/// end position of the search.
727	///
728	/// # Example
729	///
730	/// ```
731	/// use regex_automata::Input;
732	///
733	/// let mut input = Input::new("foobar");
734	/// assert!(!input.is_done());
735	/// input.set_start(`6`);
736	/// assert!(!input.is_done());
737	/// input.set_start(`7`);
738	/// assert!(input.is_done());
739	/// ```
740	#[inline]
741	pub fn is_done(&self) -> bool {
742	self.get_span().start > self.get_span().end
743	}
744
745	/// Returns true if and only if the given offset in this search's haystack
746	/// falls on a valid UTF-8 encoded codepoint boundary.
747	///
748	/// If the haystack is not valid UTF-8, then the behavior of this routine
749	/// is unspecified.
750	///
751	/// # Example
752	///
753	/// This shows where codepoint boundaries do and don't exist in valid
754	/// UTF-8.
755	///
756	/// ```
757	/// use regex_automata::Input;
758	///
759	/// let input = Input::new("☃");
760	/// assert!(input.is_char_boundary(`0`));
761	/// assert!(!input.is_char_boundary(`1`));
762	/// assert!(!input.is_char_boundary(`2`));
763	/// assert!(input.is_char_boundary(`3`));
764	/// assert!(!input.is_char_boundary(`4`));
765	/// ```
766	#[inline]
767	pub fn is_char_boundary(&self, offset: usize) -> bool {
768	utf8::is_boundary(self.haystack(), offset)
769	}
770	}
771
772	impl<'h> core::fmt::Debug for Input<'h> {
773	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
774	use crate::util::escape::DebugHaystack;
775
776	f&mut DebugStruct<'_, '_>.debug_struct("Input")
777	.field("haystack", &DebugHaystack(self.haystack()))
778	.field("span", &self.span)
779	.field("anchored", &self.anchored)
780	.field(name:"earliest", &self.earliest)
781	.finish()
782	}
783	}
784
785	impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> {
786	fn from(haystack: &'h H) -> Input<'h> {
787	Input::new(haystack)
788	}
789	}
790
791	/// A representation of a span reported by a regex engine.
792	///
793	/// A span corresponds to the starting and ending _byte offsets_ of a
794	/// contiguous region of bytes. The starting offset is inclusive while the
795	/// ending offset is exclusive. That is, a span is a half-open interval.
796	///
797	/// A span is used to report the offsets of a match, but it is also used to
798	/// convey which region of a haystack should be searched via routines like
799	/// [`Input::span`].
800	///
801	/// This is basically equivalent to a `std::ops::Range<usize>`, except this
802	/// type implements `Copy` which makes it more ergonomic to use in the context
803	/// of this crate. Like a range, this implements `Index` for `[u8]` and `str`,
804	/// and `IndexMut` for `[u8]`. For convenience, this also impls `From<Range>`,
805	/// which means things like `Span::from(5..10)` work.
806	#[derive(Clone, Copy, Eq, Hash, PartialEq)]
807	pub struct Span {
808	/// The start offset of the span, inclusive.
809	pub start: usize,
810	/// The end offset of the span, exclusive.
811	pub end: usize,
812	}
813
814	impl Span {
815	/// Returns this span as a range.
816	#[inline]
817	pub fn range(&self) -> Range<usize> {
818	Range::from(*self)
819	}
820
821	/// Returns true when this span is empty. That is, when `start >= end`.
822	#[inline]
823	pub fn is_empty(&self) -> bool {
824	self.start >= self.end
825	}
826
827	/// Returns the length of this span.
828	///
829	/// This returns `0` in precisely the cases that `is_empty` returns `true`.
830	#[inline]
831	pub fn len(&self) -> usize {
832	self.end.saturating_sub(self.start)
833	}
834
835	/// Returns true when the given offset is contained within this span.
836	///
837	/// Note that an empty span contains no offsets and will always return
838	/// false.
839	#[inline]
840	pub fn contains(&self, offset: usize) -> bool {
841	!self.is_empty() && self.start <= offset && offset <= self.end
842	}
843
844	/// Returns a new span with `offset` added to this span's `start` and `end`
845	/// values.
846	#[inline]
847	pub fn offset(&self, offset: usize) -> Span {
848	Span { start: self.start + offset, end: self.end + offset }
849	}
850	}
851
852	impl core::fmt::Debug for Span {
853	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
854	write!(f, "{}..{}", self.start, self.end)
855	}
856	}
857
858	impl core::ops::Index<Span> for [u8] {
859	type Output = [u8];
860
861	#[inline]
862	fn index(&self, index: Span) -> &[u8] {
863	&self[index.range()]
864	}
865	}
866
867	impl core::ops::IndexMut<Span> for [u8] {
868	#[inline]
869	fn index_mut(&mut self, index: Span) -> &mut [u8] {
870	&mut self[index.range()]
871	}
872	}
873
874	impl core::ops::Index<Span> for str {
875	type Output = str;
876
877	#[inline]
878	fn index(&self, index: Span) -> &str {
879	&self[index.range()]
880	}
881	}
882
883	impl From<Range<usize>> for Span {
884	#[inline]
885	fn from(range: Range<usize>) -> Span {
886	Span { start: range.start, end: range.end }
887	}
888	}
889
890	impl From<Span> for Range<usize> {
891	#[inline]
892	fn from(span: Span) -> Range<usize> {
893	Range { start: span.start, end: span.end }
894	}
895	}
896
897	impl PartialEq<Range<usize>> for Span {
898	#[inline]
899	fn eq(&self, range: &Range<usize>) -> bool {
900	self.start == range.start && self.end == range.end
901	}
902	}
903
904	impl PartialEq<Span> for Range<usize> {
905	#[inline]
906	fn eq(&self, span: &Span) -> bool {
907	self.start == span.start && self.end == span.end
908	}
909	}
910
911	/// A representation of "half" of a match reported by a DFA.
912	///
913	/// This is called a "half" match because it only includes the end location (or
914	/// start location for a reverse search) of a match. This corresponds to the
915	/// information that a single DFA scan can report. Getting the other half of
916	/// the match requires a second scan with a reversed DFA.
917	///
918	/// A half match also includes the pattern that matched. The pattern is
919	/// identified by an ID, which corresponds to its position (starting from `0`)
920	/// relative to other patterns used to construct the corresponding DFA. If only
921	/// a single pattern is provided to the DFA, then all matches are guaranteed to
922	/// have a pattern ID of `0`.
923	#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
924	pub struct HalfMatch {
925	/// The pattern ID.
926	pattern: PatternID,
927	/// The offset of the match.
928	///
929	/// For forward searches, the offset is exclusive. For reverse searches,
930	/// the offset is inclusive.
931	offset: usize,
932	}
933
934	impl HalfMatch {
935	/// Create a new half match from a pattern ID and a byte offset.
936	#[inline]
937	pub fn new(pattern: PatternID, offset: usize) -> HalfMatch {
938	HalfMatch { pattern, offset }
939	}
940
941	/// Create a new half match from a pattern ID and a byte offset.
942	///
943	/// This is like [`HalfMatch::new`], but accepts a `usize` instead of a
944	/// [`PatternID`]. This panics if the given `usize` is not representable
945	/// as a `PatternID`.
946	#[inline]
947	pub fn must(pattern: usize, offset: usize) -> HalfMatch {
948	HalfMatch::new(PatternID::new(pattern).unwrap(), offset)
949	}
950
951	/// Returns the ID of the pattern that matched.
952	///
953	/// The ID of a pattern is derived from the position in which it was
954	/// originally inserted into the corresponding DFA. The first pattern has
955	/// identifier `0`, and each subsequent pattern is `1`, `2` and so on.
956	#[inline]
957	pub fn pattern(&self) -> PatternID {
958	self.pattern
959	}
960
961	/// The position of the match.
962	///
963	/// If this match was produced by a forward search, then the offset is
964	/// exclusive. If this match was produced by a reverse search, then the
965	/// offset is inclusive.
966	#[inline]
967	pub fn offset(&self) -> usize {
968	self.offset
969	}
970	}
971
972	/// A representation of a match reported by a regex engine.
973	///
974	/// A match has two essential pieces of information: the [`PatternID`] that
975	/// matches, and the [`Span`] of the match in a haystack.
976	///
977	/// The pattern is identified by an ID, which corresponds to its position
978	/// (starting from `0`) relative to other patterns used to construct the
979	/// corresponding regex engine. If only a single pattern is provided, then all
980	/// matches are guaranteed to have a pattern ID of `0`.
981	///
982	/// Every match reported by a regex engine guarantees that its span has its
983	/// start offset as less than or equal to its end offset.
984	#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
985	pub struct Match {
986	/// The pattern ID.
987	pattern: PatternID,
988	/// The underlying match span.
989	span: Span,
990	}
991
992	impl Match {
993	/// Create a new match from a pattern ID and a span.
994	///
995	/// This constructor is generic over how a span is provided. While
996	/// a [`Span`] may be given directly, one may also provide a
997	/// `std::ops::Range<usize>`.
998	///
999	/// # Panics
1000	///
1001	/// This panics if `end < start`.
1002	///
1003	/// # Example
1004	///
1005	/// This shows how to create a match for the first pattern in a regex
1006	/// object using convenient range syntax.
1007	///
1008	/// ```
1009	/// use regex_automata::{Match, PatternID};
1010	///
1011	/// let m = Match::new(PatternID::ZERO, `5`..`10`);
1012	/// assert_eq!(`0`, m.pattern().as_usize());
1013	/// assert_eq!(`5`, m.start());
1014	/// assert_eq!(`10`, m.end());
1015	/// ```
1016	#[inline]
1017	pub fn new<S: Into<Span>>(pattern: PatternID, span: S) -> Match {
1018	let span: Span = span.into();
1019	assert!(span.start <= span.end, "invalid match span");
1020	Match { pattern, span }
1021	}
1022
1023	/// Create a new match from a pattern ID and a byte offset span.
1024	///
1025	/// This constructor is generic over how a span is provided. While
1026	/// a [`Span`] may be given directly, one may also provide a
1027	/// `std::ops::Range<usize>`.
1028	///
1029	/// This is like [`Match::new`], but accepts a `usize` instead of a
1030	/// [`PatternID`]. This panics if the given `usize` is not representable
1031	/// as a `PatternID`.
1032	///
1033	/// # Panics
1034	///
1035	/// This panics if `end < start` or if `pattern > PatternID::MAX`.
1036	///
1037	/// # Example
1038	///
1039	/// This shows how to create a match for the third pattern in a regex
1040	/// object using convenient range syntax.
1041	///
1042	/// ```
1043	/// use regex_automata::Match;
1044	///
1045	/// let m = Match::must(`3`, `5`..`10`);
1046	/// assert_eq!(`3`, m.pattern().as_usize());
1047	/// assert_eq!(`5`, m.start());
1048	/// assert_eq!(`10`, m.end());
1049	/// ```
1050	#[inline]
1051	pub fn must<S: Into<Span>>(pattern: usize, span: S) -> Match {
1052	Match::new(PatternID::must(pattern), span)
1053	}
1054
1055	/// Returns the ID of the pattern that matched.
1056	///
1057	/// The ID of a pattern is derived from the position in which it was
1058	/// originally inserted into the corresponding regex engine. The first
1059	/// pattern has identifier `0`, and each subsequent pattern is `1`, `2` and
1060	/// so on.
1061	#[inline]
1062	pub fn pattern(&self) -> PatternID {
1063	self.pattern
1064	}
1065
1066	/// The starting position of the match.
1067	///
1068	/// This is a convenience routine for `Match::span().start`.
1069	#[inline]
1070	pub fn start(&self) -> usize {
1071	self.span().start
1072	}
1073
1074	/// The ending position of the match.
1075	///
1076	/// This is a convenience routine for `Match::span().end`.
1077	#[inline]
1078	pub fn end(&self) -> usize {
1079	self.span().end
1080	}
1081
1082	/// Returns the match span as a range.
1083	///
1084	/// This is a convenience routine for `Match::span().range()`.
1085	#[inline]
1086	pub fn range(&self) -> core::ops::Range<usize> {
1087	self.span().range()
1088	}
1089
1090	/// Returns the span for this match.
1091	#[inline]
1092	pub fn span(&self) -> Span {
1093	self.span
1094	}
1095
1096	/// Returns true when the span in this match is empty.
1097	///
1098	/// An empty match can only be returned when the regex itself can match
1099	/// the empty string.
1100	#[inline]
1101	pub fn is_empty(&self) -> bool {
1102	self.span().is_empty()
1103	}
1104
1105	/// Returns the length of this match.
1106	///
1107	/// This returns `0` in precisely the cases that `is_empty` returns `true`.
1108	#[inline]
1109	pub fn len(&self) -> usize {
1110	self.span().len()
1111	}
1112	}
1113
1114	/// A set of `PatternID`s.
1115	///
1116	/// A set of pattern identifiers is useful for recording which patterns have
1117	/// matched a particular haystack. A pattern set _only_ includes pattern
1118	/// identifiers. It does not include offset information.
1119	///
1120	/// # Example
1121	///
1122	/// This shows basic usage of a set.
1123	///
1124	/// ```
1125	/// use regex_automata::{PatternID, PatternSet};
1126	///
1127	/// let pid1 = PatternID::must(`5`);
1128	/// let pid2 = PatternID::must(`8`);
1129	/// // Create a new empty set.
1130	/// let mut set = PatternSet::new(`10`);
1131	/// // Insert pattern IDs.
1132	/// set.insert(pid1);
1133	/// set.insert(pid2);
1134	/// // Test membership.
1135	/// assert!(set.contains(pid1));
1136	/// assert!(set.contains(pid2));
1137	/// // Get all members.
1138	/// assert_eq!(
1139	/// vec![`5`, `8`],
1140	/// set.iter().map(\|p\| p.as_usize()).collect::<Vec<usize>>(),
1141	/// );
1142	/// // Clear the set.
1143	/// set.clear();
1144	/// // Test that it is indeed empty.
1145	/// assert!(set.is_empty());
1146	/// ```
1147	#[cfg(feature = "alloc")]
1148	#[derive(Clone, Debug, Eq, PartialEq)]
1149	pub struct PatternSet {
1150	/// The number of patterns set to 'true' in this set.
1151	len: usize,
1152	/// A map from PatternID to boolean of whether a pattern matches or not.
1153	///
1154	/// This should probably be a bitset, but it's probably unlikely to matter
1155	/// much in practice.
1156	///
1157	/// The main downside of this representation (and similarly for a bitset)
1158	/// is that iteration scales with the capacity of the set instead of
1159	/// the length of the set. This doesn't seem likely to be a problem in
1160	/// practice.
1161	///
1162	/// Another alternative is to just use a 'SparseSet' for this. It does use
1163	/// more memory (quite a bit more), but that seems fine I think compared
1164	/// to the memory being used by the regex engine. The real hiccup with
1165	/// it is that it yields pattern IDs in the order they were inserted.
1166	/// Which is actually kind of nice, but at the time of writing, pattern
1167	/// IDs are yielded in ascending order in the regex crate RegexSet API.
1168	/// If we did change to 'SparseSet', we could provide an additional
1169	/// 'iter_match_order' iterator, but keep the ascending order one for
1170	/// compatibility.
1171	which: alloc::boxed::Box<[bool]>,
1172	}
1173
1174	#[cfg(feature = "alloc")]
1175	impl PatternSet {
1176	/// Create a new set of pattern identifiers with the given capacity.
1177	///
1178	/// The given capacity typically corresponds to (at least) the number of
1179	/// patterns in a compiled regex object.
1180	///
1181	/// # Panics
1182	///
1183	/// This panics if the given capacity exceeds [`PatternID::LIMIT`]. This is
1184	/// impossible if you use the `pattern_len()` method as defined on any of
1185	/// the regex engines in this crate. Namely, a regex will fail to build by
1186	/// returning an error if the number of patterns given to it exceeds the
1187	/// limit. Therefore, the number of patterns in a valid regex is always
1188	/// a correct capacity to provide here.
1189	pub fn new(capacity: usize) -> PatternSet {
1190	assert!(
1191	capacity <= PatternID::LIMIT,
1192	"pattern set capacity exceeds limit of {}",
1193	PatternID::LIMIT,
1194	);
1195	PatternSet {
1196	len: `0`,
1197	which: alloc::vec![`false`; capacity].into_boxed_slice(),
1198	}
1199	}
1200
1201	/// Clear this set such that it contains no pattern IDs.
1202	pub fn clear(&mut self) {
1203	self.len = `0`;
1204	for matched in self.which.iter_mut() {
1205	*matched = `false`;
1206	}
1207	}
1208
1209	/// Return true if and only if the given pattern identifier is in this set.
1210	pub fn contains(&self, pid: PatternID) -> bool {
1211	pid.as_usize() < self.capacity() && self.which[pid]
1212	}
1213
1214	/// Insert the given pattern identifier into this set and return `true` if
1215	/// the given pattern ID was not previously in this set.
1216	///
1217	/// If the pattern identifier is already in this set, then this is a no-op.
1218	///
1219	/// Use [`PatternSet::try_insert`] for a fallible version of this routine.
1220	///
1221	/// # Panics
1222	///
1223	/// This panics if this pattern set has insufficient capacity to
1224	/// store the given pattern ID.
1225	pub fn insert(&mut self, pid: PatternID) -> bool {
1226	self.try_insert(pid)
1227	.expect("PatternSet should have sufficient capacity")
1228	}
1229
1230	/// Insert the given pattern identifier into this set and return `true` if
1231	/// the given pattern ID was not previously in this set.
1232	///
1233	/// If the pattern identifier is already in this set, then this is a no-op.
1234	///
1235	/// # Errors
1236	///
1237	/// This returns an error if this pattern set has insufficient capacity to
1238	/// store the given pattern ID.
1239	pub fn try_insert(
1240	&mut self,
1241	pid: PatternID,
1242	) -> Result<bool, PatternSetInsertError> {
1243	if pid.as_usize() >= self.capacity() {
1244	return Err(PatternSetInsertError {
1245	attempted: pid,
1246	capacity: self.capacity(),
1247	});
1248	}
1249	if self.which[pid] {
1250	return Ok(`false`);
1251	}
1252	self.len += `1`;
1253	self.which[pid] = `true`;
1254	Ok(`true`)
1255	}
1256
1257	/*
1258	// This is currently commented out because it is unused and it is unclear
1259	// whether it's useful or not. What's the harm in having it? When, if
1260	// we ever wanted to change our representation to a 'SparseSet', then
1261	// supporting this method would be a bit tricky. So in order to keep some
1262	// API evolution flexibility, we leave it out for now.
1263
1264	/// Remove the given pattern identifier from this set.
1265	///
1266	/// If the pattern identifier was not previously in this set, then this
1267	/// does not change the set and returns `false`.
1268	///
1269	/// # Panics
1270	///
1271	/// This panics if `pid` exceeds the capacity of this set.
1272	pub fn remove(&mut self, pid: PatternID) -> bool {
1273	if !self.which[pid] {
1274	return false;
1275	}
1276	self.len -= 1;
1277	self.which[pid] = false;
1278	true
1279	}
1280	*/
1281
1282	/// Return true if and only if this set has no pattern identifiers in it.
1283	pub fn is_empty(&self) -> bool {
1284	self.len() == `0`
1285	}
1286
1287	/// Return true if and only if this set has the maximum number of pattern
1288	/// identifiers in the set. This occurs precisely when `PatternSet::len()
1289	/// == PatternSet::capacity()`.
1290	///
1291	/// This particular property is useful to test because it may allow one to
1292	/// stop a search earlier than you might otherwise. Namely, if a search is
1293	/// only reporting which patterns match a haystack and if you know all of
1294	/// the patterns match at a given point, then there's no new information
1295	/// that can be learned by continuing the search. (Because a pattern set
1296	/// does not keep track of offset information.)
1297	pub fn is_full(&self) -> bool {
1298	self.len() == self.capacity()
1299	}
1300
1301	/// Returns the total number of pattern identifiers in this set.
1302	pub fn len(&self) -> usize {
1303	self.len
1304	}
1305
1306	/// Returns the total number of pattern identifiers that may be stored
1307	/// in this set.
1308	///
1309	/// This is guaranteed to be less than or equal to [`PatternID::LIMIT`].
1310	///
1311	/// Typically, the capacity of a pattern set matches the number of patterns
1312	/// in a regex object with which you are searching.
1313	pub fn capacity(&self) -> usize {
1314	self.which.len()
1315	}
1316
1317	/// Returns an iterator over all pattern identifiers in this set.
1318	///
1319	/// The iterator yields pattern identifiers in ascending order, starting
1320	/// at zero.
1321	pub fn iter(&self) -> PatternSetIter<'_> {
1322	PatternSetIter { it: self.which.iter().enumerate() }
1323	}
1324	}
1325
1326	/// An error that occurs when a `PatternID` failed to insert into a
1327	/// `PatternSet`.
1328	///
1329	/// An insert fails when the given `PatternID` exceeds the configured capacity
1330	/// of the `PatternSet`.
1331	///
1332	/// This error is created by the [`PatternSet::try_insert`] routine.
1333	#[cfg(feature = "alloc")]
1334	#[derive(Clone, Debug)]
1335	pub struct PatternSetInsertError {
1336	attempted: PatternID,
1337	capacity: usize,
1338	}
1339
1340	#[cfg(feature = "std")]
1341	impl std::error::Error for PatternSetInsertError {}
1342
1343	#[cfg(feature = "alloc")]
1344	impl core::fmt::Display for PatternSetInsertError {
1345	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1346	write!(
1347	f,
1348	"failed to insert pattern ID {} into pattern set \
1349	with insufficiet capacity of {}",
1350	self.attempted.as_usize(),
1351	self.capacity,
1352	)
1353	}
1354	}
1355
1356	/// An iterator over all pattern identifiers in a [`PatternSet`].
1357	///
1358	/// The lifetime parameter `'a` refers to the lifetime of the pattern set being
1359	/// iterated over.
1360	///
1361	/// This iterator is created by the [`PatternSet::iter`] method.
1362	#[cfg(feature = "alloc")]
1363	#[derive(Clone, Debug)]
1364	pub struct PatternSetIter<'a> {
1365	it: core::iter::Enumerate<core::slice::Iter<'a, bool>>,
1366	}
1367
1368	#[cfg(feature = "alloc")]
1369	impl<'a> Iterator for PatternSetIter<'a> {
1370	type Item = PatternID;
1371
1372	fn next(&mut self) -> Option<PatternID> {
1373	while let Some((index: usize, &yes: bool)) = self.it.next() {
1374	if yes {
1375	// Only valid 'PatternID' values can be inserted into the set
1376	// and construction of the set panics if the capacity would
1377	// permit storing invalid pattern IDs. Thus, 'yes' is only true
1378	// precisely when 'index' corresponds to a valid 'PatternID'.
1379	return Some(PatternID::new_unchecked(index));
1380	}
1381	}
1382	None
1383	}
1384
1385	fn size_hint(&self) -> (usize, Option<usize>) {
1386	self.it.size_hint()
1387	}
1388	}
1389
1390	#[cfg(feature = "alloc")]
1391	impl<'a> DoubleEndedIterator for PatternSetIter<'a> {
1392	fn next_back(&mut self) -> Option<PatternID> {
1393	while let Some((index: usize, &yes: bool)) = self.it.next_back() {
1394	if yes {
1395	// Only valid 'PatternID' values can be inserted into the set
1396	// and construction of the set panics if the capacity would
1397	// permit storing invalid pattern IDs. Thus, 'yes' is only true
1398	// precisely when 'index' corresponds to a valid 'PatternID'.
1399	return Some(PatternID::new_unchecked(index));
1400	}
1401	}
1402	None
1403	}
1404	}
1405
1406	/// The type of anchored search to perform.
1407	///
1408	/// This is almost* a boolean option. That is, you can either do an unanchored*
1409	/// search for any pattern in a regex, or you can do an anchored search for any
1410	/// pattern in a regex.
1411	///
1412	/// A third option exists that, assuming the regex engine supports it, permits
1413	/// you to do an anchored search for a specific pattern.
1414	///
1415	/// Note that there is no way to run an unanchored search for a specific
1416	/// pattern. If you need that, you'll need to build separate regexes for each
1417	/// pattern.
1418	///
1419	/// # Errors
1420	///
1421	/// If a regex engine does not support the anchored mode selected, then the
1422	/// regex engine will return an error. While any non-trivial regex engine
1423	/// should support at least one of the available anchored modes, there is no
1424	/// singular mode that is guaranteed to be universally supported. Some regex
1425	/// engines might only support unanchored searches (DFAs compiled without
1426	/// anchored starting states) and some regex engines might only support
1427	/// anchored searches (like the one-pass DFA).
1428	///
1429	/// The specific error returned is a [`MatchError`] with a
1430	/// [`MatchErrorKind::UnsupportedAnchored`] kind. The kind includes the
1431	/// `Anchored` value given that is unsupported.
1432	///
1433	/// Note that regex engines should report "no match" if, for example, an
1434	/// `Anchored::Pattern` is provided with an invalid pattern ID _but_ where
1435	/// anchored searches for a specific pattern are supported. This is smooths out
1436	/// behavior such that it's possible to guarantee that an error never occurs
1437	/// based on how the regex engine is configured. All regex engines in this
1438	/// crate report "no match" when searching for an invalid pattern ID, but where
1439	/// searching for a valid pattern ID is otherwise supported.
1440	///
1441	/// # Example
1442	///
1443	/// This example shows how to use the various `Anchored` modes to run a
1444	/// search. We use the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM)
1445	/// because it supports all modes unconditionally. Some regex engines, like
1446	/// the [`onepass::DFA`](crate::dfa::onepass::DFA) cannot support unanchored
1447	/// searches.
1448	///
1449	/// ```
1450	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1451	/// use regex_automata::{
1452	/// nfa::thompson::pikevm::PikeVM,
1453	/// Anchored, Input, Match, PatternID,
1454	/// };
1455	///
1456	/// let re = PikeVM::new_many(&[
1457	/// r"Mrs. \w+",
1458	/// r"Miss \w+",
1459	/// r"Mr. \w+",
1460	/// r"Ms. \w+",
1461	/// ])?;
1462	/// let mut cache = re.create_cache();
1463	/// let hay = "Hello Mr. Springsteen!";
1464	///
1465	/// // The default is to do an unanchored search.
1466	/// assert_eq!(Some(Match::must(`2`, `6`..`21`)), re.find(&mut cache, hay));
1467	/// // Explicitly ask for an unanchored search. Same as above.
1468	/// let input = Input::new(hay).anchored(Anchored::No);
1469	/// assert_eq!(Some(Match::must(`2`, `6`..`21`)), re.find(&mut cache, hay));
1470	///
1471	/// // Now try an anchored search. Since the match doesn't start at the
1472	/// // beginning of the haystack, no match is found!
1473	/// let input = Input::new(hay).anchored(Anchored::Yes);
1474	/// assert_eq!(None, re.find(&mut cache, input));
1475	///
1476	/// // We can try an anchored search again, but move the location of where
1477	/// // we start the search. Note that the offsets reported are still in
1478	/// // terms of the overall haystack and not relative to where we started
1479	/// // the search.
1480	/// let input = Input::new(hay).anchored(Anchored::Yes).range(`6`..);
1481	/// assert_eq!(Some(Match::must(`2`, `6`..`21`)), re.find(&mut cache, input));
1482	///
1483	/// // Now try an anchored search for a specific pattern. We specifically
1484	/// // choose a pattern that we know doesn't match to prove that the search
1485	/// // only looks for the pattern we provide.
1486	/// let input = Input::new(hay)
1487	/// .anchored(Anchored::Pattern(PatternID::must(`1`)))
1488	/// .range(`6`..);
1489	/// assert_eq!(None, re.find(&mut cache, input));
1490	///
1491	/// // But if we switch it to the pattern that we know matches, then we find
1492	/// // the match.
1493	/// let input = Input::new(hay)
1494	/// .anchored(Anchored::Pattern(PatternID::must(`2`)))
1495	/// .range(`6`..);
1496	/// assert_eq!(Some(Match::must(`2`, `6`..`21`)), re.find(&mut cache, input));
1497	///
1498	/// # Ok::<(), Box<dyn std::error::Error>>(())
1499	/// ```
1500	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1501	pub enum Anchored {
1502	/// Run an unanchored search. This means a match may occur anywhere at or
1503	/// after the start position of the search.
1504	///
1505	/// This search can return a match for any pattern in the regex.
1506	No,
1507	/// Run an anchored search. This means that a match must begin at the
1508	/// start position of the search.
1509	///
1510	/// This search can return a match for any pattern in the regex.
1511	Yes,
1512	/// Run an anchored search for a specific pattern. This means that a match
1513	/// must be for the given pattern and must begin at the start position of
1514	/// the search.
1515	Pattern(PatternID),
1516	}
1517
1518	impl Anchored {
1519	/// Returns true if and only if this anchor mode corresponds to any kind of
1520	/// anchored search.
1521	///
1522	/// # Example
1523	///
1524	/// This examples shows that both `Anchored::Yes` and `Anchored::Pattern`
1525	/// are considered anchored searches.
1526	///
1527	/// ```
1528	/// use regex_automata::{Anchored, PatternID};
1529	///
1530	/// assert!(!Anchored::No.is_anchored());
1531	/// assert!(Anchored::Yes.is_anchored());
1532	/// assert!(Anchored::Pattern(PatternID::ZERO).is_anchored());
1533	/// ```
1534	#[inline]
1535	pub fn is_anchored(&self) -> bool {
1536	matches!(*self, Anchored::Yes \| Anchored::Pattern(_))
1537	}
1538
1539	/// Returns the pattern ID associated with this configuration if it is an
1540	/// anchored search for a specific pattern. Otherwise `None` is returned.
1541	///
1542	/// # Example
1543	///
1544	/// ```
1545	/// use regex_automata::{Anchored, PatternID};
1546	///
1547	/// assert_eq!(None, Anchored::No.pattern());
1548	/// assert_eq!(None, Anchored::Yes.pattern());
1549	///
1550	/// let pid = PatternID::must(`5`);
1551	/// assert_eq!(Some(pid), Anchored::Pattern(pid).pattern());
1552	/// ```
1553	#[inline]
1554	pub fn pattern(&self) -> Option<PatternID> {
1555	match *self {
1556	Anchored::Pattern(pid) => Some(pid),
1557	_ => None,
1558	}
1559	}
1560	}
1561
1562	/// The kind of match semantics to use for a regex pattern.
1563	///
1564	/// The default match kind is `LeftmostFirst`, and this corresponds to the
1565	/// match semantics used by most backtracking engines, such as Perl.
1566	///
1567	/// # Leftmost first or "preference order" match semantics
1568	///
1569	/// Leftmost-first semantics determine which match to report when there are
1570	/// multiple paths through a regex that match at the same position. The tie is
1571	/// essentially broken by how a backtracker would behave. For example, consider
1572	/// running the regex `foofoofoo\|foofoo\|foo` on the haystack `foofoo`. In this
1573	/// case, both the `foofoo` and `foo` branches match at position `0`. So should
1574	/// the end of the match be `3` or `6`?
1575	///
1576	/// A backtracker will conceptually work by trying `foofoofoo` and failing.
1577	/// Then it will try `foofoo`, find the match and stop there. Thus, the
1578	/// leftmost-first match position is `6`. This is called "leftmost-first" or
1579	/// "preference order" because the order of the branches as written in the
1580	/// regex pattern is what determines how to break the tie.
1581	///
1582	/// (Note that leftmost-longest match semantics, which break ties by always
1583	/// taking the longest matching string, are not currently supported by this
1584	/// crate. These match semantics tend to be found in POSIX regex engines.)
1585	///
1586	/// This example shows how leftmost-first semantics work, and how it even
1587	/// applies to multi-pattern regexes:
1588	///
1589	/// ```
1590	/// use regex_automata::{
1591	/// nfa::thompson::pikevm::PikeVM,
1592	/// Match,
1593	/// };
1594	///
1595	/// let re = PikeVM::new_many(&[
1596	/// r"foofoofoo",
1597	/// r"foofoo",
1598	/// r"foo",
1599	/// ])?;
1600	/// let mut cache = re.create_cache();
1601	/// let got: Vec<Match> = re.find_iter(&mut cache, "foofoo").collect();
1602	/// let expected = vec![Match::must(`1`, `0`..`6`)];
1603	/// assert_eq!(expected, got);
1604	///
1605	/// # Ok::<(), Box<dyn std::error::Error>>(())
1606	/// ```
1607	///
1608	/// # All matches
1609	///
1610	/// The `All` match semantics report any and all matches, and generally will
1611	/// attempt to match as much as possible. It doesn't respect any sort of match
1612	/// priority at all, so things like non-greedy matching don't work in this
1613	/// mode.
1614	///
1615	/// The fact that non-greedy matching doesn't work generally makes most forms
1616	/// of unanchored non-overlapping searches have unintuitive behavior. Namely,
1617	/// unanchored searches behave as if there is a `(?s-u:.)?` prefix at the*
1618	/// beginning of the pattern, which is specifically non-greedy. Since it will
1619	/// be treated as greedy in `All` match semantics, this generally means that
1620	/// it will first attempt to consume all of the haystack and is likely to wind
1621	/// up skipping matches.
1622	///
1623	/// Generally speaking, `All` should only be used in two circumstances:
1624	///
1625	/// When running an anchored search and there is a desire to match as much as*
1626	/// possible. For example, when building a reverse regex matcher to find the
1627	/// start of a match after finding the end. In this case, the reverse search
1628	/// is anchored to the end of the match found by the forward search.
1629	/// When running overlapping searches. Since `All` encodes all possible*
1630	/// matches, this is generally what you want for an overlapping search. If you
1631	/// try to use leftmost-first in an overlapping search, it is likely to produce
1632	/// counter-intuitive results since leftmost-first specifically excludes some
1633	/// matches from its underlying finite state machine.
1634	///
1635	/// This example demonstrates the counter-intuitive behavior of `All` semantics
1636	/// when using a standard leftmost unanchored search:
1637	///
1638	/// ```
1639	/// use regex_automata::{
1640	/// nfa::thompson::pikevm::PikeVM,
1641	/// Match, MatchKind,
1642	/// };
1643	///
1644	/// let re = PikeVM::builder()
1645	/// .configure(PikeVM::config().match_kind(MatchKind::All))
1646	/// .build("foo")?;
1647	/// let hay = "first foo second foo wat";
1648	/// let mut cache = re.create_cache();
1649	/// let got: Vec<Match> = re.find_iter(&mut cache, hay).collect();
1650	/// // Notice that it completely skips the first 'foo'!
1651	/// let expected = vec![Match::must(`0`, `17`..`20`)];
1652	/// assert_eq!(expected, got);
1653	///
1654	/// # Ok::<(), Box<dyn std::error::Error>>(())
1655	/// ```
1656	///
1657	/// This second example shows how `All` semantics are useful for an overlapping
1658	/// search. Note that we use lower level lazy DFA APIs here since the NFA
1659	/// engines only currently support a very limited form of overlapping search.
1660	///
1661	/// ```
1662	/// use regex_automata::{
1663	/// hybrid::dfa::{DFA, OverlappingState},
1664	/// HalfMatch, Input, MatchKind,
1665	/// };
1666	///
1667	/// let re = DFA::builder()
1668	/// // If we didn't set 'All' semantics here, then the regex would only
1669	/// // match 'foo' at offset 3 and nothing else. Why? Because the state
1670	/// // machine implements preference order and knows that the 'foofoo' and
1671	/// // 'foofoofoo' branches can never match since 'foo' will always match
1672	/// // when they match and take priority.
1673	/// .configure(DFA::config().match_kind(MatchKind::All))
1674	/// .build(r"foo\|foofoo\|foofoofoo")?;
1675	/// let mut cache = re.create_cache();
1676	/// let mut state = OverlappingState::start();
1677	/// let input = Input::new("foofoofoo");
1678	/// let mut got = vec![];
1679	/// loop {
1680	/// re.try_search_overlapping_fwd(&mut cache, &input, &mut state)?;
1681	/// let m = match state.get_match() {
1682	/// None => break,
1683	/// Some(m) => m,
1684	/// };
1685	/// got.push(m);
1686	/// }
1687	/// let expected = vec![
1688	/// HalfMatch::must(`0`, `3`),
1689	/// HalfMatch::must(`0`, `6`),
1690	/// HalfMatch::must(`0`, `9`),
1691	/// ];
1692	/// assert_eq!(expected, got);
1693	///
1694	/// # Ok::<(), Box<dyn std::error::Error>>(())
1695	/// ```
1696	#[non_exhaustive]
1697	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1698	pub enum MatchKind {
1699	/// Report all possible matches.
1700	All,
1701	/// Report only the leftmost matches. When multiple leftmost matches exist,
1702	/// report the match corresponding to the part of the regex that appears
1703	/// first in the syntax.
1704	LeftmostFirst,
1705	// There is prior art in RE2 that shows that we should be able to add
1706	// LeftmostLongest too. The tricky part of it is supporting ungreedy
1707	// repetitions. Instead of treating all NFA states as having equivalent
1708	// priority (as in 'All') or treating all NFA states as having distinct
1709	// priority based on order (as in 'LeftmostFirst'), we instead group NFA
1710	// states into sets, and treat members of each set as having equivalent
1711	// priority, but having greater priority than all following members
1712	// of different sets.
1713	//
1714	// However, it's not clear whether it's really worth adding this. After
1715	// all, leftmost-longest can be emulated when using literals by using
1716	// leftmost-first and sorting the literals by length in descending order.
1717	// However, this won't work for arbitrary regexes. e.g., `\w\|\w\w` will
1718	// always match `a` in `ab` when using leftmost-first, but leftmost-longest
1719	// would match `ab`.
1720	}
1721
1722	impl MatchKind {
1723	#[cfg(feature = "alloc")]
1724	pub(crate) fn continue_past_first_match(&self) -> bool {
1725	*self == MatchKind::All
1726	}
1727	}
1728
1729	impl Default for MatchKind {
1730	fn default() -> MatchKind {
1731	MatchKind::LeftmostFirst
1732	}
1733	}
1734
1735	/// An error indicating that a search stopped before reporting whether a
1736	/// match exists or not.
1737	///
1738	/// To be very clear, this error type implies that one cannot assume that no
1739	/// matches occur, since the search stopped before completing. That is, if
1740	/// you're looking for information about where a search determined that no
1741	/// match can occur, then this error type does not* give you that. (Indeed, at*
1742	/// the time of writing, if you need such a thing, you have to write your own
1743	/// search routine.)
1744	///
1745	/// Normally, when one searches for something, the response is either an
1746	/// affirmative "it was found at this location" or a negative "not found at
1747	/// all." However, in some cases, a regex engine can be configured to stop its
1748	/// search before concluding whether a match exists or not. When this happens,
1749	/// it may be important for the caller to know why the regex engine gave up and
1750	/// where in the input it gave up at. This error type exposes the 'why' and the
1751	/// 'where.'
1752	///
1753	/// For example, the DFAs provided by this library generally cannot correctly
1754	/// implement Unicode word boundaries. Instead, they provide an option to
1755	/// eagerly support them on ASCII text (since Unicode word boundaries are
1756	/// equivalent to ASCII word boundaries when searching ASCII text), but will
1757	/// "give up" if a non-ASCII byte is seen. In such cases, one is usually
1758	/// required to either report the failure to the caller (unergonomic) or
1759	/// otherwise fall back to some other regex engine (ergonomic, but potentially
1760	/// costly).
1761	///
1762	/// More generally, some regex engines offer the ability for callers to specify
1763	/// certain bytes that will trigger the regex engine to automatically quit if
1764	/// they are seen.
1765	///
1766	/// Still yet, there may be other reasons for a failed match. For example,
1767	/// the hybrid DFA provided by this crate can be configured to give up if it
1768	/// believes that it is not efficient. This in turn permits callers to choose a
1769	/// different regex engine.
1770	///
1771	/// (Note that DFAs are configured by default to never quit or give up in this
1772	/// fashion. For example, by default, a DFA will fail to build if the regex
1773	/// pattern contains a Unicode word boundary. One needs to opt into the "quit"
1774	/// behavior via options, like
1775	/// [`hybrid::dfa::Config::unicode_word_boundary`](crate::hybrid::dfa::Config::unicode_word_boundary).)
1776	///
1777	/// There are a couple other ways a search
1778	/// can fail. For example, when using the
1779	/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker)
1780	/// with a haystack that is too long, or trying to run an unanchored search
1781	/// with a [one-pass DFA](crate::dfa::onepass).
1782	#[derive(Clone, Debug, Eq, PartialEq)]
1783	pub struct MatchError(
1784	#[cfg(feature = "alloc")] alloc::boxed::Box<MatchErrorKind>,
1785	#[cfg(not(feature = "alloc"))] MatchErrorKind,
1786	);
1787
1788	impl MatchError {
1789	/// Create a new error value with the given kind.
1790	///
1791	/// This is a more verbose version of the kind-specific constructors,
1792	/// e.g., `MatchError::quit`.
1793	pub fn new(kind: MatchErrorKind) -> MatchError {
1794	#[cfg(feature = "alloc")]
1795	{
1796	MatchError(alloc::boxed::Box::new(kind))
1797	}
1798	#[cfg(not(feature = "alloc"))]
1799	{
1800	MatchError(kind)
1801	}
1802	}
1803
1804	/// Returns a reference to the underlying error kind.
1805	pub fn kind(&self) -> &MatchErrorKind {
1806	&self.0
1807	}
1808
1809	/// Create a new "quit" error. The given `byte` corresponds to the value
1810	/// that tripped a search's quit condition, and `offset` corresponds to the
1811	/// location in the haystack at which the search quit.
1812	///
1813	/// This is the same as calling `MatchError::new` with a
1814	/// [`MatchErrorKind::Quit`] kind.
1815	pub fn quit(byte: u8, offset: usize) -> MatchError {
1816	MatchError::new(MatchErrorKind::Quit { byte, offset })
1817	}
1818
1819	/// Create a new "gave up" error. The given `offset` corresponds to the
1820	/// location in the haystack at which the search gave up.
1821	///
1822	/// This is the same as calling `MatchError::new` with a
1823	/// [`MatchErrorKind::GaveUp`] kind.
1824	pub fn gave_up(offset: usize) -> MatchError {
1825	MatchError::new(MatchErrorKind::GaveUp { offset })
1826	}
1827
1828	/// Create a new "haystack too long" error. The given `len` corresponds to
1829	/// the length of the haystack that was problematic.
1830	///
1831	/// This is the same as calling `MatchError::new` with a
1832	/// [`MatchErrorKind::HaystackTooLong`] kind.
1833	pub fn haystack_too_long(len: usize) -> MatchError {
1834	MatchError::new(MatchErrorKind::HaystackTooLong { len })
1835	}
1836
1837	/// Create a new "unsupported anchored" error. This occurs when the caller
1838	/// requests a search with an anchor mode that is not supported by the
1839	/// regex engine.
1840	///
1841	/// This is the same as calling `MatchError::new` with a
1842	/// [`MatchErrorKind::UnsupportedAnchored`] kind.
1843	pub fn unsupported_anchored(mode: Anchored) -> MatchError {
1844	MatchError::new(MatchErrorKind::UnsupportedAnchored { mode })
1845	}
1846	}
1847
1848	/// The underlying kind of a [`MatchError`].
1849	///
1850	/// This is a non-exhaustive* enum. That means new variants may be added in*
1851	/// a semver-compatible release.
1852	#[non_exhaustive]
1853	#[derive(Clone, Debug, Eq, PartialEq)]
1854	pub enum MatchErrorKind {
1855	/// The search saw a "quit" byte at which it was instructed to stop
1856	/// searching.
1857	Quit {
1858	/// The "quit" byte that was observed that caused the search to stop.
1859	byte: u8,
1860	/// The offset at which the quit byte was observed.
1861	offset: usize,
1862	},
1863	/// The search, based on heuristics, determined that it would be better
1864	/// to stop, typically to provide the caller an opportunity to use an
1865	/// alternative regex engine.
1866	///
1867	/// Currently, the only way for this to occur is via the lazy DFA and
1868	/// only when it is configured to do so (it will not return this error by
1869	/// default).
1870	GaveUp {
1871	/// The offset at which the search stopped. This corresponds to the
1872	/// position immediately following the last byte scanned.
1873	offset: usize,
1874	},
1875	/// This error occurs if the haystack given to the regex engine was too
1876	/// long to be searched. This occurs, for example, with regex engines
1877	/// like the bounded backtracker that have a configurable fixed amount of
1878	/// capacity that is tied to the length of the haystack. Anything beyond
1879	/// that configured limit will result in an error at search time.
1880	HaystackTooLong {
1881	/// The length of the haystack that exceeded the limit.
1882	len: usize,
1883	},
1884	/// An error indicating that a particular type of anchored search was
1885	/// requested, but that the regex engine does not support it.
1886	///
1887	/// Note that this error should not be returned by a regex engine simply
1888	/// because the pattern ID is invalid (i.e., equal to or exceeds the number
1889	/// of patterns in the regex). In that case, the regex engine should report
1890	/// a non-match.
1891	UnsupportedAnchored {
1892	/// The anchored mode given that is unsupported.
1893	mode: Anchored,
1894	},
1895	}
1896
1897	#[cfg(feature = "std")]
1898	impl std::error::Error for MatchError {}
1899
1900	impl core::fmt::Display for MatchError {
1901	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1902	match *self.kind() {
1903	MatchErrorKind::Quit { byte, offset } => write!(
1904	f,
1905	"quit search after observing byte {:?} at offset {}",
1906	DebugByte(byte),
1907	offset,
1908	),
1909	MatchErrorKind::GaveUp { offset } => {
1910	write!(f, "gave up searching at offset {}", offset)
1911	}
1912	MatchErrorKind::HaystackTooLong { len } => {
1913	write!(f, "haystack of length {} is too long", len)
1914	}
1915	MatchErrorKind::UnsupportedAnchored { mode: Anchored::Yes } => {
1916	write!(f, "anchored searches are not supported or enabled")
1917	}
1918	MatchErrorKind::UnsupportedAnchored { mode: Anchored::No } => {
1919	write!(f, "unanchored searches are not supported or enabled")
1920	}
1921	MatchErrorKind::UnsupportedAnchored {
1922	mode: Anchored::Pattern(pid),
1923	} => {
1924	write!(
1925	f,
1926	"anchored searches for a specific pattern ({}) are \
1927	not supported or enabled",
1928	pid.as_usize(),
1929	)
1930	}
1931	}
1932	}
1933	}
1934
1935	#[cfg(test)]
1936	mod tests {
1937	use super::*;
1938
1939	// We test that our 'MatchError' type is the size we expect. This isn't an
1940	// API guarantee, but if the size increases, we really want to make sure we
1941	// decide to do that intentionally. So this should be a speed bump. And in
1942	// general, we should not increase the size without a very good reason.
1943	//
1944	// Why? Because low level search APIs return Result<.., MatchError>. When
1945	// MatchError gets bigger, so to does the Result type.
1946	//
1947	// Now, when 'alloc' is enabled, we do box the error, which de-emphasizes
1948	// the importance of keeping a small error type. But without 'alloc', we
1949	// still want things to be small.
1950	#[test]
1951	fn match_error_size() {
1952	let expected_size = if cfg!(feature = "alloc") {
1953	core::mem::size_of::<usize>()
1954	} else {
1955	`2` * core::mem::size_of::<usize>()
1956	};
1957	assert_eq!(expected_size, core::mem::size_of::<MatchError>());
1958	}
1959
1960	// Same as above, but for the underlying match error kind.
1961	#[cfg(target_pointer_width = "64")]
1962	#[test]
1963	fn match_error_kind_size() {
1964	let expected_size = `2` * core::mem::size_of::<usize>();
1965	assert_eq!(expected_size, core::mem::size_of::<MatchErrorKind>());
1966	}
1967
1968	#[cfg(target_pointer_width = "32")]
1969	#[test]
1970	fn match_error_kind_size() {
1971	let expected_size = `3` * core::mem::size_of::<usize>();
1972	assert_eq!(expected_size, core::mem::size_of::<MatchErrorKind>());
1973	}
1974
1975	#[test]
1976	fn incorrect_asref_guard() {
1977	struct Bad(std::cell::Cell<bool>);
1978
1979	impl AsRef<[u8]> for Bad {
1980	fn as_ref(&self) -> &[u8] {
1981	if self.0.replace(`false`) {
1982	&[]
1983	} else {
1984	&[`0`; `1000`]
1985	}
1986	}
1987	}
1988
1989	let bad = Bad(std::cell::Cell::new(`true`));
1990	let input = Input::new(&bad);
1991	assert!(input.end() <= input.haystack().len());
1992	}
1993	}
1994

Provided by KDAB

Definitions