captures.rs source code [crates/regex-automata-0.4.6/src/util/captures.rs]

1	/!*
2	Provides types for dealing with capturing groups.
3
4	Capturing groups refer to sub-patterns of regexes that some regex engines can
5	report matching offsets for. For example, matching `[a-z]([0-9]+)` against
6	`a789` would give `a789` as the overall match (for the implicit capturing group
7	at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an
8	explicit capturing group at index `1`).
9
10	Not all regex engines can report match offsets for capturing groups. Indeed,
11	to a first approximation, regex engines that can report capturing group offsets
12	tend to be quite a bit slower than regex engines that can't. This is because
13	tracking capturing groups at search time usually requires more "power" that
14	in turn adds overhead.
15
16	Other regex implementations might call capturing groups "submatches."
17
18	# Overview
19
20	The main types in this module are:
21
22	* [`Captures`] records the capturing group offsets found during a search. It
23	provides convenience routines for looking up capturing group offsets by either
24	index or name.
25	* [`GroupInfo`] records the mapping between capturing groups and "slots,"
26	where the latter are how capturing groups are recorded during a regex search.
27	This also keeps a mapping from capturing group name to index, and capture
28	group index to name. A `GroupInfo` is used by `Captures` internally to
29	provide a convenient API. It is unlikely that you'll use a `GroupInfo`
30	directly, but for example, if you've compiled an Thompson NFA, then you can use
31	[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its
32	underlying `GroupInfo`.
33	*/
34
35	use alloc::{string::String, sync::Arc, vec, vec::Vec};
36
37	use crate::util::{
38	interpolate,
39	primitives::{
40	NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex,
41	},
42	search::{Match, Span},
43	};
44
45	/// The span offsets of capturing groups after a match has been found.
46	///
47	/// This type represents the output of regex engines that can report the
48	/// offsets at which capturing groups matches or "submatches" occur. For
49	/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match
50	/// occurs, it will at minimum contain the [`PatternID`] of the pattern that
51	/// matched. Depending upon how it was constructed, it may also contain the
52	/// start/end offsets of the entire match of the pattern and the start/end
53	/// offsets of each capturing group that participated in the match.
54	///
55	/// Values of this type are always created for a specific [`GroupInfo`]. It is
56	/// unspecified behavior to use a `Captures` value in a search with any regex
57	/// engine that has a different `GroupInfo` than the one the `Captures` were
58	/// created with.
59	///
60	/// # Constructors
61	///
62	/// There are three constructors for this type that control what kind of
63	/// information is available upon a match:
64	///
65	/// * [`Captures::all`]: Will store overall pattern match offsets in addition
66	/// to the offsets of capturing groups that participated in the match.
67	/// * [`Captures::matches`]: Will store only the overall pattern
68	/// match offsets. The offsets of capturing groups (even ones that participated
69	/// in the match) are not available.
70	/// * [`Captures::empty`]: Will only store the pattern ID that matched. No
71	/// match offsets are available at all.
72	///
73	/// If you aren't sure which to choose, then pick the first one. The first one
74	/// is what convenience routines like,
75	/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures),
76	/// will use automatically.
77	///
78	/// The main difference between these choices is performance. Namely, if you
79	/// ask for _less_ information, then the execution of regex search may be able
80	/// to run more quickly.
81	///
82	/// # Notes
83	///
84	/// It is worth pointing out that this type is not coupled to any one specific
85	/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the
86	/// thing that is responsible for mapping capturing groups to "slot" offsets.
87	/// Slot offsets are indices into a single sequence of memory at which matching
88	/// haystack offsets for the corresponding group are written by regex engines.
89	///
90	/// # Example
91	///
92	/// This example shows how to parse a simple date and extract the components of
93	/// the date via capturing groups:
94	///
95	/// ```
96	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
97	///
98	/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
99	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
100	///
101	/// re.captures(&mut cache, "2010-03-14", &mut caps);
102	/// assert!(caps.is_match());
103	/// assert_eq!(Some(Span::from(`0`..`4`)), caps.get_group(`1`));
104	/// assert_eq!(Some(Span::from(`5`..`7`)), caps.get_group(`2`));
105	/// assert_eq!(Some(Span::from(`8`..`10`)), caps.get_group(`3`));
106	///
107	/// # Ok::<(), Box<dyn std::error::Error>>(())
108	/// ```
109	///
110	/// # Example: named capturing groups
111	///
112	/// This example is like the one above, but leverages the ability to name
113	/// capturing groups in order to make the code a bit clearer:
114	///
115	/// ```
116	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
117	///
118	/// let re = PikeVM::new(r"^(?P<y>[0-9]{4})-(?P<m>[0-9]{2})-(?P<d>[0-9]{2})$")?;
119	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
120	///
121	/// re.captures(&mut cache, "2010-03-14", &mut caps);
122	/// assert!(caps.is_match());
123	/// assert_eq!(Some(Span::from(`0`..`4`)), caps.get_group_by_name("y"));
124	/// assert_eq!(Some(Span::from(`5`..`7`)), caps.get_group_by_name("m"));
125	/// assert_eq!(Some(Span::from(`8`..`10`)), caps.get_group_by_name("d"));
126	///
127	/// # Ok::<(), Box<dyn std::error::Error>>(())
128	/// ```
129	#[derive(Clone)]
130	pub struct Captures {
131	/// The group info that these capture groups are coupled to. This is what
132	/// gives the "convenience" of the `Captures` API. Namely, it provides the
133	/// slot mapping and the name\|-->index mapping for capture lookups by name.
134	group_info: GroupInfo,
135	/// The ID of the pattern that matched. Regex engines must set this to
136	/// None when no match occurs.
137	pid: Option<PatternID>,
138	/// The slot values, i.e., submatch offsets.
139	///
140	/// In theory, the smallest sequence of slots would be something like
141	/// `max(groups(pattern) for pattern in regex) 2`, but instead, we use*
142	/// `sum(groups(pattern) for pattern in regex) 2`. Why?*
143	///
144	/// Well, the former could be used in theory, because we don't generally
145	/// have any overlapping APIs that involve capturing groups. Therefore,
146	/// there's technically never any need to have slots set for multiple
147	/// patterns. However, this might change some day, in which case, we would
148	/// need to have slots available.
149	///
150	/// The other reason is that during the execution of some regex engines,
151	/// there exists a point in time where multiple slots for different
152	/// patterns may be written to before knowing which pattern has matched.
153	/// Therefore, the regex engines themselves, in order to support multiple
154	/// patterns correctly, must have all slots available. If `Captures`
155	/// doesn't have all slots available, then regex engines can't write
156	/// directly into the caller provided `Captures` and must instead write
157	/// into some other storage and then copy the slots involved in the match
158	/// at the end of the search.
159	///
160	/// So overall, at least as of the time of writing, it seems like the path
161	/// of least resistance is to just require allocating all possible slots
162	/// instead of the conceptual minimum. Another way to justify this is that
163	/// the most common case is a single pattern, in which case, there is no
164	/// inefficiency here since the 'max' and 'sum' calculations above are
165	/// equivalent in that case.
166	///
167	/// N.B. The mapping from group index to slot is maintained by `GroupInfo`
168	/// and is considered an API guarantee. See `GroupInfo` for more details on
169	/// that mapping.
170	///
171	/// N.B. `Option<NonMaxUsize>` has the same size as a `usize`.
172	slots: Vec<Option<NonMaxUsize>>,
173	}
174
175	impl Captures {
176	/// Create new storage for the offsets of all matching capturing groups.
177	///
178	/// This routine provides the most information for matches---namely, the
179	/// spans of matching capturing groups---but also requires the regex search
180	/// routines to do the most work.
181	///
182	/// It is unspecified behavior to use the returned `Captures` value in a
183	/// search with a `GroupInfo` other than the one that is provided to this
184	/// constructor.
185	///
186	/// # Example
187	///
188	/// This example shows that all capturing groups---but only ones that
189	/// participated in a match---are available to query after a match has
190	/// been found:
191	///
192	/// ```
193	/// use regex_automata::{
194	/// nfa::thompson::pikevm::PikeVM,
195	/// util::captures::Captures,
196	/// Span, Match,
197	/// };
198	///
199	/// let re = PikeVM::new(
200	/// r"^(?:(?P<lower>[a-z]+)\|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
201	/// )?;
202	/// let mut cache = re.create_cache();
203	/// let mut caps = Captures::all(re.get_nfa().group_info().clone());
204	///
205	/// re.captures(&mut cache, "ABC123", &mut caps);
206	/// assert!(caps.is_match());
207	/// assert_eq!(Some(Match::must(`0`, `0`..`6`)), caps.get_match());
208	/// // The 'lower' group didn't match, so it won't have any offsets.
209	/// assert_eq!(None, caps.get_group_by_name("lower"));
210	/// assert_eq!(Some(Span::from(`0`..`3`)), caps.get_group_by_name("upper"));
211	/// assert_eq!(Some(Span::from(`3`..`6`)), caps.get_group_by_name("digits"));
212	///
213	/// # Ok::<(), Box<dyn std::error::Error>>(())
214	/// ```
215	pub fn all(group_info: GroupInfo) -> Captures {
216	let slots = group_info.slot_len();
217	Captures { group_info, pid: None, slots: vec![None; slots] }
218	}
219
220	/// Create new storage for only the full match spans of a pattern. This
221	/// does not include any capturing group offsets.
222	///
223	/// It is unspecified behavior to use the returned `Captures` value in a
224	/// search with a `GroupInfo` other than the one that is provided to this
225	/// constructor.
226	///
227	/// # Example
228	///
229	/// This example shows that only overall match offsets are reported when
230	/// this constructor is used. Accessing any capturing groups other than
231	/// the 0th will always return `None`.
232	///
233	/// ```
234	/// use regex_automata::{
235	/// nfa::thompson::pikevm::PikeVM,
236	/// util::captures::Captures,
237	/// Match,
238	/// };
239	///
240	/// let re = PikeVM::new(
241	/// r"^(?:(?P<lower>[a-z]+)\|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
242	/// )?;
243	/// let mut cache = re.create_cache();
244	/// let mut caps = Captures::matches(re.get_nfa().group_info().clone());
245	///
246	/// re.captures(&mut cache, "ABC123", &mut caps);
247	/// assert!(caps.is_match());
248	/// assert_eq!(Some(Match::must(`0`, `0`..`6`)), caps.get_match());
249	/// // We didn't ask for capturing group offsets, so they aren't available.
250	/// assert_eq!(None, caps.get_group_by_name("lower"));
251	/// assert_eq!(None, caps.get_group_by_name("upper"));
252	/// assert_eq!(None, caps.get_group_by_name("digits"));
253	///
254	/// # Ok::<(), Box<dyn std::error::Error>>(())
255	/// ```
256	pub fn matches(group_info: GroupInfo) -> Captures {
257	// This is OK because we know there are at least this many slots,
258	// and GroupInfo construction guarantees that the number of slots fits
259	// into a usize.
260	let slots = group_info.pattern_len().checked_mul(`2`).unwrap();
261	Captures { group_info, pid: None, slots: vec![None; slots] }
262	}
263
264	/// Create new storage for only tracking which pattern matched. No offsets
265	/// are stored at all.
266	///
267	/// It is unspecified behavior to use the returned `Captures` value in a
268	/// search with a `GroupInfo` other than the one that is provided to this
269	/// constructor.
270	///
271	/// # Example
272	///
273	/// This example shows that only the pattern that matched can be accessed
274	/// from a `Captures` value created via this constructor.
275	///
276	/// ```
277	/// use regex_automata::{
278	/// nfa::thompson::pikevm::PikeVM,
279	/// util::captures::Captures,
280	/// PatternID,
281	/// };
282	///
283	/// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
284	/// let mut cache = re.create_cache();
285	/// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
286	///
287	/// re.captures(&mut cache, "aABCz", &mut caps);
288	/// assert!(caps.is_match());
289	/// assert_eq!(Some(PatternID::must(`0`)), caps.pattern());
290	/// // We didn't ask for any offsets, so they aren't available.
291	/// assert_eq!(None, caps.get_match());
292	///
293	/// re.captures(&mut cache, &"aABCz"[`1`..], &mut caps);
294	/// assert!(caps.is_match());
295	/// assert_eq!(Some(PatternID::must(`1`)), caps.pattern());
296	/// // We didn't ask for any offsets, so they aren't available.
297	/// assert_eq!(None, caps.get_match());
298	///
299	/// # Ok::<(), Box<dyn std::error::Error>>(())
300	/// ```
301	pub fn empty(group_info: GroupInfo) -> Captures {
302	Captures { group_info, pid: None, slots: vec![] }
303	}
304
305	/// Returns true if and only if this capturing group represents a match.
306	///
307	/// This is a convenience routine for `caps.pattern().is_some()`.
308	///
309	/// # Example
310	///
311	/// When using the PikeVM (for example), the lightest weight way of
312	/// detecting whether a match exists is to create capturing groups that
313	/// only track the ID of the pattern that match (if any):
314	///
315	/// ```
316	/// use regex_automata::{
317	/// nfa::thompson::pikevm::PikeVM,
318	/// util::captures::Captures,
319	/// };
320	///
321	/// let re = PikeVM::new(r"[a-z]+")?;
322	/// let mut cache = re.create_cache();
323	/// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
324	///
325	/// re.captures(&mut cache, "aABCz", &mut caps);
326	/// assert!(caps.is_match());
327	///
328	/// # Ok::<(), Box<dyn std::error::Error>>(())
329	/// ```
330	#[inline]
331	pub fn is_match(&self) -> bool {
332	self.pid.is_some()
333	}
334
335	/// Returns the identifier of the pattern that matched when this
336	/// capturing group represents a match. If no match was found, then this
337	/// always returns `None`.
338	///
339	/// This returns a pattern ID in precisely the cases in which `is_match`
340	/// returns `true`. Similarly, the pattern ID returned is always the
341	/// same pattern ID found in the `Match` returned by `get_match`.
342	///
343	/// # Example
344	///
345	/// When using the PikeVM (for example), the lightest weight way of
346	/// detecting which pattern matched is to create capturing groups that only
347	/// track the ID of the pattern that match (if any):
348	///
349	/// ```
350	/// use regex_automata::{
351	/// nfa::thompson::pikevm::PikeVM,
352	/// util::captures::Captures,
353	/// PatternID,
354	/// };
355	///
356	/// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
357	/// let mut cache = re.create_cache();
358	/// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
359	///
360	/// re.captures(&mut cache, "ABC", &mut caps);
361	/// assert_eq!(Some(PatternID::must(`1`)), caps.pattern());
362	/// // Recall that offsets are only available when using a non-empty
363	/// // Captures value. So even though a match occurred, this returns None!
364	/// assert_eq!(None, caps.get_match());
365	///
366	/// # Ok::<(), Box<dyn std::error::Error>>(())
367	/// ```
368	#[inline]
369	pub fn pattern(&self) -> Option<PatternID> {
370	self.pid
371	}
372
373	/// Returns the pattern ID and the span of the match, if one occurred.
374	///
375	/// This always returns `None` when `Captures` was created with
376	/// [`Captures::empty`], even if a match was found.
377	///
378	/// If this routine returns a non-`None` value, then `is_match` is
379	/// guaranteed to return `true` and `pattern` is also guaranteed to return
380	/// a non-`None` value.
381	///
382	/// # Example
383	///
384	/// This example shows how to get the full match from a search:
385	///
386	/// ```
387	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
388	///
389	/// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
390	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
391	///
392	/// re.captures(&mut cache, "ABC", &mut caps);
393	/// assert_eq!(Some(Match::must(`1`, `0`..`3`)), caps.get_match());
394	///
395	/// # Ok::<(), Box<dyn std::error::Error>>(())
396	/// ```
397	#[inline]
398	pub fn get_match(&self) -> Option<Match> {
399	Some(Match::new(self.pattern()?, self.get_group(`0`)?))
400	}
401
402	/// Returns the span of a capturing group match corresponding to the group
403	/// index given, only if both the overall pattern matched and the capturing
404	/// group participated in that match.
405	///
406	/// This returns `None` if `index` is invalid. `index` is valid if and only
407	/// if it's less than [`Captures::group_len`] for the matching pattern.
408	///
409	/// This always returns `None` when `Captures` was created with
410	/// [`Captures::empty`], even if a match was found. This also always
411	/// returns `None` for any `index > 0` when `Captures` was created with
412	/// [`Captures::matches`].
413	///
414	/// If this routine returns a non-`None` value, then `is_match` is
415	/// guaranteed to return `true`, `pattern` is guaranteed to return a
416	/// non-`None` value and `get_match` is guaranteed to return a non-`None`
417	/// value.
418	///
419	/// By convention, the 0th capture group will always return the same
420	/// span as the span returned by `get_match`. This is because the 0th
421	/// capture group always corresponds to the entirety of the pattern's
422	/// match. (It is similarly always unnamed because it is implicit.) This
423	/// isn't necessarily true of all regex engines. For example, one can
424	/// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a
425	/// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't
426	/// technically forced to make the 0th capturing group always correspond to
427	/// the entire match.
428	///
429	/// # Example
430	///
431	/// This example shows how to get the capturing groups, by index, from a
432	/// match:
433	///
434	/// ```
435	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
436	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
437	///
438	/// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
439	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
440	///
441	/// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
442	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), caps.get_match());
443	/// assert_eq!(Some(Span::from(`0`..`5`)), caps.get_group(`1`));
444	/// assert_eq!(Some(Span::from(`6`..`17`)), caps.get_group(`2`));
445	/// // Looking for a non-existent capturing group will return None:
446	/// assert_eq!(None, caps.get_group(`3`));
447	/// # // literals are too big for 32-bit usize: #1039
448	/// # #[cfg(target_pointer_width = "64")]
449	/// assert_eq!(None, caps.get_group(`9944060567225171988`));
450	///
451	/// # Ok::<(), Box<dyn std::error::Error>>(())
452	/// ```
453	#[inline]
454	pub fn get_group(&self, index: usize) -> Option<Span> {
455	let pid = self.pattern()?;
456	// There's a little bit of work needed to map captures to slots in the
457	// fully general case. But in the overwhelming common case of a single
458	// pattern, we can just do some simple arithmetic.
459	let (slot_start, slot_end) = if self.group_info().pattern_len() == `1` {
460	(index.checked_mul(`2`)?, index.checked_mul(`2`)?.checked_add(`1`)?)
461	} else {
462	self.group_info().slots(pid, index)?
463	};
464	let start = self.slots.get(slot_start).copied()??;
465	let end = self.slots.get(slot_end).copied()??;
466	Some(Span { start: start.get(), end: end.get() })
467	}
468
469	/// Returns the span of a capturing group match corresponding to the group
470	/// name given, only if both the overall pattern matched and the capturing
471	/// group participated in that match.
472	///
473	/// This returns `None` if `name` does not correspond to a valid capturing
474	/// group for the pattern that matched.
475	///
476	/// This always returns `None` when `Captures` was created with
477	/// [`Captures::empty`], even if a match was found. This also always
478	/// returns `None` for any `index > 0` when `Captures` was created with
479	/// [`Captures::matches`].
480	///
481	/// If this routine returns a non-`None` value, then `is_match` is
482	/// guaranteed to return `true`, `pattern` is guaranteed to return a
483	/// non-`None` value and `get_match` is guaranteed to return a non-`None`
484	/// value.
485	///
486	/// # Example
487	///
488	/// This example shows how to get the capturing groups, by name, from a
489	/// match:
490	///
491	/// ```
492	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
493	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
494	///
495	/// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
496	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
497	///
498	/// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
499	/// assert_eq!(Some(Match::must(`0`, `0`..`17`)), caps.get_match());
500	/// assert_eq!(Some(Span::from(`0`..`5`)), caps.get_group_by_name("first"));
501	/// assert_eq!(Some(Span::from(`6`..`17`)), caps.get_group_by_name("last"));
502	/// // Looking for a non-existent capturing group will return None:
503	/// assert_eq!(None, caps.get_group_by_name("middle"));
504	///
505	/// # Ok::<(), Box<dyn std::error::Error>>(())
506	/// ```
507	pub fn get_group_by_name(&self, name: &str) -> Option<Span> {
508	let index = self.group_info().to_index(self.pattern()?, name)?;
509	self.get_group(index)
510	}
511
512	/// Returns an iterator of possible spans for every capturing group in the
513	/// matching pattern.
514	///
515	/// If this `Captures` value does not correspond to a match, then the
516	/// iterator returned yields no elements.
517	///
518	/// Note that the iterator returned yields elements of type `Option<Span>`.
519	/// A span is present if and only if it corresponds to a capturing group
520	/// that participated in a match.
521	///
522	/// # Example
523	///
524	/// This example shows how to collect all capturing groups:
525	///
526	/// ```
527	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
528	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
529	///
530	/// let re = PikeVM::new(
531	/// // Matches first/last names, with an optional middle name.
532	/// r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
533	/// )?;
534	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
535	///
536	/// re.captures(&mut cache, "Harry James Potter", &mut caps);
537	/// assert!(caps.is_match());
538	/// let groups: Vec<Option<Span>> = caps.iter().collect();
539	/// assert_eq!(groups, vec![
540	/// Some(Span::from(`0`..`18`)),
541	/// Some(Span::from(`0`..`5`)),
542	/// Some(Span::from(`6`..`11`)),
543	/// Some(Span::from(`12`..`18`)),
544	/// ]);
545	///
546	/// # Ok::<(), Box<dyn std::error::Error>>(())
547	/// ```
548	///
549	/// This example uses the same regex as the previous example, but with a
550	/// haystack that omits the middle name. This results in a capturing group
551	/// that is present in the elements yielded by the iterator but without a
552	/// match:
553	///
554	/// ```
555	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
556	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
557	///
558	/// let re = PikeVM::new(
559	/// // Matches first/last names, with an optional middle name.
560	/// r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
561	/// )?;
562	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
563	///
564	/// re.captures(&mut cache, "Harry Potter", &mut caps);
565	/// assert!(caps.is_match());
566	/// let groups: Vec<Option<Span>> = caps.iter().collect();
567	/// assert_eq!(groups, vec![
568	/// Some(Span::from(`0`..`12`)),
569	/// Some(Span::from(`0`..`5`)),
570	/// None,
571	/// Some(Span::from(`6`..`12`)),
572	/// ]);
573	///
574	/// # Ok::<(), Box<dyn std::error::Error>>(())
575	/// ```
576	pub fn iter(&self) -> CapturesPatternIter<'_> {
577	let names = self
578	.pattern()
579	.map_or(GroupInfoPatternNames::empty().enumerate(), \|pid\| {
580	self.group_info().pattern_names(pid).enumerate()
581	});
582	CapturesPatternIter { caps: self, names }
583	}
584
585	/// Return the total number of capturing groups for the matching pattern.
586	///
587	/// If this `Captures` value does not correspond to a match, then this
588	/// always returns `0`.
589	///
590	/// This always returns the same number of elements yielded by
591	/// [`Captures::iter`]. That is, the number includes capturing groups even
592	/// if they don't participate in the match.
593	///
594	/// # Example
595	///
596	/// This example shows how to count the total number of capturing groups
597	/// associated with a pattern. Notice that it includes groups that did not
598	/// participate in a match (just like `Captures::iter` does).
599	///
600	/// ```
601	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
602	/// use regex_automata::nfa::thompson::pikevm::PikeVM;
603	///
604	/// let re = PikeVM::new(
605	/// // Matches first/last names, with an optional middle name.
606	/// r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
607	/// )?;
608	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
609	///
610	/// re.captures(&mut cache, "Harry Potter", &mut caps);
611	/// assert_eq!(`4`, caps.group_len());
612	///
613	/// # Ok::<(), Box<dyn std::error::Error>>(())
614	/// ```
615	pub fn group_len(&self) -> usize {
616	let pid = match self.pattern() {
617	None => return `0`,
618	Some(pid) => pid,
619	};
620	self.group_info().group_len(pid)
621	}
622
623	/// Returns a reference to the underlying group info on which these
624	/// captures are based.
625	///
626	/// The difference between `GroupInfo` and `Captures` is that the former
627	/// defines the structure of capturing groups where as the latter is what
628	/// stores the actual match information. So where as `Captures` only gives
629	/// you access to the current match, `GroupInfo` lets you query any
630	/// information about all capturing groups, even ones for patterns that
631	/// weren't involved in a match.
632	///
633	/// Note that a `GroupInfo` uses reference counting internally, so it may
634	/// be cloned cheaply.
635	///
636	/// # Example
637	///
638	/// This example shows how to get all capturing group names from the
639	/// underlying `GroupInfo`. Notice that we don't even need to run a
640	/// search.
641	///
642	/// ```
643	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
644	///
645	/// let re = PikeVM::new_many(&[
646	/// r"(?P<foo>a)",
647	/// r"(a)(b)",
648	/// r"ab",
649	/// r"(?P<bar>a)(?P<quux>a)",
650	/// r"(?P<foo>z)",
651	/// ])?;
652	/// let caps = re.create_captures();
653	///
654	/// let expected = vec![
655	/// (PatternID::must(`0`), `0`, None),
656	/// (PatternID::must(`0`), `1`, Some("foo")),
657	/// (PatternID::must(`1`), `0`, None),
658	/// (PatternID::must(`1`), `1`, None),
659	/// (PatternID::must(`1`), `2`, None),
660	/// (PatternID::must(`2`), `0`, None),
661	/// (PatternID::must(`3`), `0`, None),
662	/// (PatternID::must(`3`), `1`, Some("bar")),
663	/// (PatternID::must(`3`), `2`, Some("quux")),
664	/// (PatternID::must(`4`), `0`, None),
665	/// (PatternID::must(`4`), `1`, Some("foo")),
666	/// ];
667	/// // We could also just use 're.get_nfa().group_info()'.
668	/// let got: Vec<(PatternID, usize, Option<&str>)> =
669	/// caps.group_info().all_names().collect();
670	/// assert_eq!(expected, got);
671	///
672	/// # Ok::<(), Box<dyn std::error::Error>>(())
673	/// ```
674	pub fn group_info(&self) -> &GroupInfo {
675	&self.group_info
676	}
677
678	/// Interpolates the capture references in `replacement` with the
679	/// corresponding substrings in `haystack` matched by each reference. The
680	/// interpolated string is returned.
681	///
682	/// See the [`interpolate` module](interpolate) for documentation on the
683	/// format of the replacement string.
684	///
685	/// # Example
686	///
687	/// This example shows how to use interpolation, and also shows how it
688	/// can work with multi-pattern regexes.
689	///
690	/// ```
691	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
692	///
693	/// let re = PikeVM::new_many(&[
694	/// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
695	/// r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
696	/// ])?;
697	/// let mut cache = re.create_cache();
698	/// let mut caps = re.create_captures();
699	///
700	/// let replacement = "year=$year, month=$month, day=$day";
701	///
702	/// // This matches the first pattern.
703	/// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
704	/// re.captures(&mut cache, hay, &mut caps);
705	/// let result = caps.interpolate_string(hay, replacement);
706	/// assert_eq!("year=2010, month=03, day=14", result);
707	///
708	/// // And this matches the second pattern.
709	/// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
710	/// re.captures(&mut cache, hay, &mut caps);
711	/// let result = caps.interpolate_string(hay, replacement);
712	/// assert_eq!("year=2010, month=03, day=14", result);
713	///
714	/// # Ok::<(), Box<dyn std::error::Error>>(())
715	/// ```
716	pub fn interpolate_string(
717	&self,
718	haystack: &str,
719	replacement: &str,
720	) -> String {
721	let mut dst = String::new();
722	self.interpolate_string_into(haystack, replacement, &mut dst);
723	dst
724	}
725
726	/// Interpolates the capture references in `replacement` with the
727	/// corresponding substrings in `haystack` matched by each reference. The
728	/// interpolated string is written to `dst`.
729	///
730	/// See the [`interpolate` module](interpolate) for documentation on the
731	/// format of the replacement string.
732	///
733	/// # Example
734	///
735	/// This example shows how to use interpolation, and also shows how it
736	/// can work with multi-pattern regexes.
737	///
738	/// ```
739	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
740	///
741	/// let re = PikeVM::new_many(&[
742	/// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
743	/// r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
744	/// ])?;
745	/// let mut cache = re.create_cache();
746	/// let mut caps = re.create_captures();
747	///
748	/// let replacement = "year=$year, month=$month, day=$day";
749	///
750	/// // This matches the first pattern.
751	/// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
752	/// re.captures(&mut cache, hay, &mut caps);
753	/// let mut dst = String::new();
754	/// caps.interpolate_string_into(hay, replacement, &mut dst);
755	/// assert_eq!("year=2010, month=03, day=14", dst);
756	///
757	/// // And this matches the second pattern.
758	/// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
759	/// re.captures(&mut cache, hay, &mut caps);
760	/// let mut dst = String::new();
761	/// caps.interpolate_string_into(hay, replacement, &mut dst);
762	/// assert_eq!("year=2010, month=03, day=14", dst);
763	///
764	/// # Ok::<(), Box<dyn std::error::Error>>(())
765	/// ```
766	pub fn interpolate_string_into(
767	&self,
768	haystack: &str,
769	replacement: &str,
770	dst: &mut String,
771	) {
772	interpolate::string(
773	replacement,
774	\|index, dst\| {
775	let span = match self.get_group(index) {
776	None => return,
777	Some(span) => span,
778	};
779	dst.push_str(&haystack[span]);
780	},
781	\|name\| self.group_info().to_index(self.pattern()?, name),
782	dst,
783	);
784	}
785
786	/// Interpolates the capture references in `replacement` with the
787	/// corresponding substrings in `haystack` matched by each reference. The
788	/// interpolated byte string is returned.
789	///
790	/// See the [`interpolate` module](interpolate) for documentation on the
791	/// format of the replacement string.
792	///
793	/// # Example
794	///
795	/// This example shows how to use interpolation, and also shows how it
796	/// can work with multi-pattern regexes.
797	///
798	/// ```
799	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
800	///
801	/// let re = PikeVM::new_many(&[
802	/// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
803	/// r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
804	/// ])?;
805	/// let mut cache = re.create_cache();
806	/// let mut caps = re.create_captures();
807	///
808	/// let replacement = b"year=$year, month=$month, day=$day";
809	///
810	/// // This matches the first pattern.
811	/// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
812	/// re.captures(&mut cache, hay, &mut caps);
813	/// let result = caps.interpolate_bytes(hay, replacement);
814	/// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
815	///
816	/// // And this matches the second pattern.
817	/// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
818	/// re.captures(&mut cache, hay, &mut caps);
819	/// let result = caps.interpolate_bytes(hay, replacement);
820	/// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
821	///
822	/// # Ok::<(), Box<dyn std::error::Error>>(())
823	/// ```
824	pub fn interpolate_bytes(
825	&self,
826	haystack: &[u8],
827	replacement: &[u8],
828	) -> Vec<u8> {
829	let mut dst = vec![];
830	self.interpolate_bytes_into(haystack, replacement, &mut dst);
831	dst
832	}
833
834	/// Interpolates the capture references in `replacement` with the
835	/// corresponding substrings in `haystack` matched by each reference. The
836	/// interpolated byte string is written to `dst`.
837	///
838	/// See the [`interpolate` module](interpolate) for documentation on the
839	/// format of the replacement string.
840	///
841	/// # Example
842	///
843	/// This example shows how to use interpolation, and also shows how it
844	/// can work with multi-pattern regexes.
845	///
846	/// ```
847	/// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
848	///
849	/// let re = PikeVM::new_many(&[
850	/// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
851	/// r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
852	/// ])?;
853	/// let mut cache = re.create_cache();
854	/// let mut caps = re.create_captures();
855	///
856	/// let replacement = b"year=$year, month=$month, day=$day";
857	///
858	/// // This matches the first pattern.
859	/// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
860	/// re.captures(&mut cache, hay, &mut caps);
861	/// let mut dst = vec![];
862	/// caps.interpolate_bytes_into(hay, replacement, &mut dst);
863	/// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
864	///
865	/// // And this matches the second pattern.
866	/// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
867	/// re.captures(&mut cache, hay, &mut caps);
868	/// let mut dst = vec![];
869	/// caps.interpolate_bytes_into(hay, replacement, &mut dst);
870	/// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
871	///
872	/// # Ok::<(), Box<dyn std::error::Error>>(())
873	/// ```
874	pub fn interpolate_bytes_into(
875	&self,
876	haystack: &[u8],
877	replacement: &[u8],
878	dst: &mut Vec<u8>,
879	) {
880	interpolate::bytes(
881	replacement,
882	\|index, dst\| {
883	let span = match self.get_group(index) {
884	None => return,
885	Some(span) => span,
886	};
887	dst.extend_from_slice(&haystack[span]);
888	},
889	\|name\| self.group_info().to_index(self.pattern()?, name),
890	dst,
891	);
892	}
893
894	/// This is a convenience routine for extracting the substrings
895	/// corresponding to matching capture groups in the given `haystack`. The
896	/// `haystack` should be the same substring used to find the match spans in
897	/// this `Captures` value.
898	///
899	/// This is identical to [`Captures::extract_bytes`], except it works with
900	/// `&str` instead of `&[u8]`.
901	///
902	/// # Panics
903	///
904	/// This panics if the number of explicit matching groups in this
905	/// `Captures` value is less than `N`. This also panics if this `Captures`
906	/// value does not correspond to a match.
907	///
908	/// Note that this does not* panic if the number of explicit matching*
909	/// groups is bigger than `N`. In that case, only the first `N` matching
910	/// groups are extracted.
911	///
912	/// # Example
913	///
914	/// ```
915	/// use regex_automata::nfa::thompson::pikevm::PikeVM;
916	///
917	/// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
918	/// let mut cache = re.create_cache();
919	/// let mut caps = re.create_captures();
920	///
921	/// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
922	/// re.captures(&mut cache, hay, &mut caps);
923	/// assert!(caps.is_match());
924	/// let (full, [year, month, day]) = caps.extract(hay);
925	/// assert_eq!("2010-03-14", full);
926	/// assert_eq!("2010", year);
927	/// assert_eq!("03", month);
928	/// assert_eq!("14", day);
929	///
930	/// // We can also ask for fewer than all capture groups.
931	/// let (full, [year]) = caps.extract(hay);
932	/// assert_eq!("2010-03-14", full);
933	/// assert_eq!("2010", year);
934	///
935	/// # Ok::<(), Box<dyn std::error::Error>>(())
936	/// ```
937	pub fn extract<'h, const N: usize>(
938	&self,
939	haystack: &'h str,
940	) -> (&'h str, [&'h str; N]) {
941	let mut matched = self.iter().flatten();
942	let whole_match = &haystack[matched.next().expect("a match")];
943	let group_matches = [`0`; N].map(\|_\| {
944	let sp = matched.next().expect("too few matching groups");
945	&haystack[sp]
946	});
947	(whole_match, group_matches)
948	}
949
950	/// This is a convenience routine for extracting the substrings
951	/// corresponding to matching capture groups in the given `haystack`. The
952	/// `haystack` should be the same substring used to find the match spans in
953	/// this `Captures` value.
954	///
955	/// This is identical to [`Captures::extract`], except it works with
956	/// `&[u8]` instead of `&str`.
957	///
958	/// # Panics
959	///
960	/// This panics if the number of explicit matching groups in this
961	/// `Captures` value is less than `N`. This also panics if this `Captures`
962	/// value does not correspond to a match.
963	///
964	/// Note that this does not* panic if the number of explicit matching*
965	/// groups is bigger than `N`. In that case, only the first `N` matching
966	/// groups are extracted.
967	///
968	/// # Example
969	///
970	/// ```
971	/// use regex_automata::nfa::thompson::pikevm::PikeVM;
972	///
973	/// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
974	/// let mut cache = re.create_cache();
975	/// let mut caps = re.create_captures();
976	///
977	/// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
978	/// re.captures(&mut cache, hay, &mut caps);
979	/// assert!(caps.is_match());
980	/// let (full, [year, month, day]) = caps.extract_bytes(hay);
981	/// assert_eq!(b"2010-03-14", full);
982	/// assert_eq!(b"2010", year);
983	/// assert_eq!(b"03", month);
984	/// assert_eq!(b"14", day);
985	///
986	/// // We can also ask for fewer than all capture groups.
987	/// let (full, [year]) = caps.extract_bytes(hay);
988	/// assert_eq!(b"2010-03-14", full);
989	/// assert_eq!(b"2010", year);
990	///
991	/// # Ok::<(), Box<dyn std::error::Error>>(())
992	/// ```
993	pub fn extract_bytes<'h, const N: usize>(
994	&self,
995	haystack: &'h [u8],
996	) -> (&'h [u8], [&'h [u8]; N]) {
997	let mut matched = self.iter().flatten();
998	let whole_match = &haystack[matched.next().expect("a match")];
999	let group_matches = [`0`; N].map(\|_\| {
1000	let sp = matched.next().expect("too few matching groups");
1001	&haystack[sp]
1002	});
1003	(whole_match, group_matches)
1004	}
1005	}
1006
1007	/// Lower level "slot" oriented APIs. One does not typically need to use these
1008	/// when executing a search. They are instead mostly intended for folks that
1009	/// are writing their own regex engine while reusing this `Captures` type.
1010	impl Captures {
1011	/// Clear this `Captures` value.
1012	///
1013	/// After clearing, all slots inside this `Captures` value will be set to
1014	/// `None`. Similarly, any pattern ID that it was previously associated
1015	/// with (for a match) is erased.
1016	///
1017	/// It is not usually necessary to call this routine. Namely, a `Captures`
1018	/// value only provides high level access to the capturing groups of the
1019	/// pattern that matched, and only low level access to individual slots.
1020	/// Thus, even if slots corresponding to groups that aren't associated
1021	/// with the matching pattern are set, then it won't impact the higher
1022	/// level APIs. Namely, higher level APIs like [`Captures::get_group`] will
1023	/// return `None` if no pattern ID is present, even if there are spans set
1024	/// in the underlying slots.
1025	///
1026	/// Thus, to "clear" a `Captures` value of a match, it is usually only
1027	/// necessary to call [`Captures::set_pattern`] with `None`.
1028	///
1029	/// # Example
1030	///
1031	/// This example shows what happens when a `Captures` value is cleared.
1032	///
1033	/// ```
1034	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1035	/// use regex_automata::nfa::thompson::pikevm::PikeVM;
1036	///
1037	/// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1038	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1039	///
1040	/// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1041	/// assert!(caps.is_match());
1042	/// let slots: Vec<Option<usize>> =
1043	/// caps.slots().iter().map(\|s\| s.map(\|x\| x.get())).collect();
1044	/// // Note that the following ordering is considered an API guarantee.
1045	/// assert_eq!(slots, vec![
1046	/// Some(`0`),
1047	/// Some(`17`),
1048	/// Some(`0`),
1049	/// Some(`5`),
1050	/// Some(`6`),
1051	/// Some(`17`),
1052	/// ]);
1053	///
1054	/// // Now clear the slots. Everything is gone and it is no longer a match.
1055	/// caps.clear();
1056	/// assert!(!caps.is_match());
1057	/// let slots: Vec<Option<usize>> =
1058	/// caps.slots().iter().map(\|s\| s.map(\|x\| x.get())).collect();
1059	/// assert_eq!(slots, vec![
1060	/// None,
1061	/// None,
1062	/// None,
1063	/// None,
1064	/// None,
1065	/// None,
1066	/// ]);
1067	///
1068	/// # Ok::<(), Box<dyn std::error::Error>>(())
1069	/// ```
1070	#[inline]
1071	pub fn clear(&mut self) {
1072	self.pid = None;
1073	for slot in self.slots.iter_mut() {
1074	*slot = None;
1075	}
1076	}
1077
1078	/// Set the pattern on this `Captures` value.
1079	///
1080	/// When the pattern ID is `None`, then this `Captures` value does not
1081	/// correspond to a match (`is_match` will return `false`). Otherwise, it
1082	/// corresponds to a match.
1083	///
1084	/// This is useful in search implementations where you might want to
1085	/// initially call `set_pattern(None)` in order to avoid the cost of
1086	/// calling `clear()` if it turns out to not be necessary.
1087	///
1088	/// # Example
1089	///
1090	/// This example shows that `set_pattern` merely overwrites the pattern ID.
1091	/// It does not actually change the underlying slot values.
1092	///
1093	/// ```
1094	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1095	/// use regex_automata::nfa::thompson::pikevm::PikeVM;
1096	///
1097	/// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1098	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1099	///
1100	/// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1101	/// assert!(caps.is_match());
1102	/// assert!(caps.pattern().is_some());
1103	/// let slots: Vec<Option<usize>> =
1104	/// caps.slots().iter().map(\|s\| s.map(\|x\| x.get())).collect();
1105	/// // Note that the following ordering is considered an API guarantee.
1106	/// assert_eq!(slots, vec![
1107	/// Some(`0`),
1108	/// Some(`17`),
1109	/// Some(`0`),
1110	/// Some(`5`),
1111	/// Some(`6`),
1112	/// Some(`17`),
1113	/// ]);
1114	///
1115	/// // Now set the pattern to None. Note that the slot values remain.
1116	/// caps.set_pattern(None);
1117	/// assert!(!caps.is_match());
1118	/// assert!(!caps.pattern().is_some());
1119	/// let slots: Vec<Option<usize>> =
1120	/// caps.slots().iter().map(\|s\| s.map(\|x\| x.get())).collect();
1121	/// // Note that the following ordering is considered an API guarantee.
1122	/// assert_eq!(slots, vec![
1123	/// Some(`0`),
1124	/// Some(`17`),
1125	/// Some(`0`),
1126	/// Some(`5`),
1127	/// Some(`6`),
1128	/// Some(`17`),
1129	/// ]);
1130	///
1131	/// # Ok::<(), Box<dyn std::error::Error>>(())
1132	/// ```
1133	#[inline]
1134	pub fn set_pattern(&mut self, pid: Option<PatternID>) {
1135	self.pid = pid;
1136	}
1137
1138	/// Returns the underlying slots, where each slot stores a single offset.
1139	///
1140	/// Every matching capturing group generally corresponds to two slots: one
1141	/// slot for the starting position and another for the ending position.
1142	/// Typically, either both are present or neither are. (The weasel word
1143	/// "typically" is used here because it really depends on the regex engine
1144	/// implementation. Every sensible regex engine likely adheres to this
1145	/// invariant, and every regex engine in this crate is sensible.)
1146	///
1147	/// Generally speaking, callers should prefer to use higher level routines
1148	/// like [`Captures::get_match`] or [`Captures::get_group`].
1149	///
1150	/// An important note here is that a regex engine may not reset all of the
1151	/// slots to `None` values when no match occurs, or even when a match of
1152	/// a different pattern occurs. But this depends on how the regex engine
1153	/// implementation deals with slots.
1154	///
1155	/// # Example
1156	///
1157	/// This example shows how to get the underlying slots from a regex match.
1158	///
1159	/// ```
1160	/// use regex_automata::{
1161	/// nfa::thompson::pikevm::PikeVM,
1162	/// util::primitives::{PatternID, NonMaxUsize},
1163	/// };
1164	///
1165	/// let re = PikeVM::new_many(&[
1166	/// r"[a-z]+",
1167	/// r"[0-9]+",
1168	/// ])?;
1169	/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1170	///
1171	/// re.captures(&mut cache, "123", &mut caps);
1172	/// assert_eq!(Some(PatternID::must(`1`)), caps.pattern());
1173	/// // Note that the only guarantee we have here is that slots 2 and 3
1174	/// // are set to correct values. The contents of the first two slots are
1175	/// // unspecified since the 0th pattern did not match.
1176	/// let expected = &[
1177	/// None,
1178	/// None,
1179	/// NonMaxUsize::new(`0`),
1180	/// NonMaxUsize::new(`3`),
1181	/// ];
1182	/// assert_eq!(expected, caps.slots());
1183	///
1184	/// # Ok::<(), Box<dyn std::error::Error>>(())
1185	/// ```
1186	#[inline]
1187	pub fn slots(&self) -> &[Option<NonMaxUsize>] {
1188	&self.slots
1189	}
1190
1191	/// Returns the underlying slots as a mutable slice, where each slot stores
1192	/// a single offset.
1193	///
1194	/// This tends to be most useful for regex engine implementations for
1195	/// writing offsets for matching capturing groups to slots.
1196	///
1197	/// See [`Captures::slots`] for more information about slots.
1198	#[inline]
1199	pub fn slots_mut(&mut self) -> &mut [Option<NonMaxUsize>] {
1200	&mut self.slots
1201	}
1202	}
1203
1204	impl core::fmt::Debug for Captures {
1205	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1206	let mut dstruct: DebugStruct<'_, '_> = f.debug_struct(name:"Captures");
1207	dstruct.field(name:"pid", &self.pid);
1208	if let Some(pid: PatternID) = self.pid {
1209	dstruct.field(name:"spans", &CapturesDebugMap { pid, caps: self });
1210	}
1211	dstruct.finish()
1212	}
1213	}
1214
1215	/// A little helper type to provide a nice map-like debug representation for
1216	/// our capturing group spans.
1217	struct CapturesDebugMap<'a> {
1218	pid: PatternID,
1219	caps: &'a Captures,
1220	}
1221
1222	impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
1223	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1224	struct Key<'a>(usize, Option<&'a str>);
1225
1226	impl<'a> core::fmt::Debug for Key<'a> {
1227	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1228	write!(f, "{}", self.0)?;
1229	if let Some(name) = self.1 {
1230	write!(f, "/{:?}", name)?;
1231	}
1232	Ok(())
1233	}
1234	}
1235
1236	let mut map = f.debug_map();
1237	let names = self.caps.group_info().pattern_names(self.pid);
1238	for (group_index, maybe_name) in names.enumerate() {
1239	let key = Key(group_index, maybe_name);
1240	match self.caps.get_group(group_index) {
1241	None => map.entry(&key, &None::<()>),
1242	Some(span) => map.entry(&key, &span),
1243	};
1244	}
1245	map.finish()
1246	}
1247	}
1248
1249	/// An iterator over all capturing groups in a `Captures` value.
1250	///
1251	/// This iterator includes capturing groups that did not participate in a
1252	/// match. See the [`Captures::iter`] method documentation for more details
1253	/// and examples.
1254	///
1255	/// The lifetime parameter `'a` refers to the lifetime of the underlying
1256	/// `Captures` value.
1257	#[derive(Clone, Debug)]
1258	pub struct CapturesPatternIter<'a> {
1259	caps: &'a Captures,
1260	names: core::iter::Enumerate<GroupInfoPatternNames<'a>>,
1261	}
1262
1263	impl<'a> Iterator for CapturesPatternIter<'a> {
1264	type Item = Option<Span>;
1265
1266	fn next(&mut self) -> Option<Option<Span>> {
1267	let (group_index: usize, _) = self.names.next()?;
1268	Some(self.caps.get_group(group_index))
1269	}
1270
1271	fn size_hint(&self) -> (usize, Option<usize>) {
1272	self.names.size_hint()
1273	}
1274
1275	fn count(self) -> usize {
1276	self.names.count()
1277	}
1278	}
1279
1280	impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {}
1281	impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {}
1282
1283	/// Represents information about capturing groups in a compiled regex.
1284	///
1285	/// The information encapsulated by this type consists of the following. For
1286	/// each pattern:
1287	///
1288	/// A map from every capture group name to its corresponding capture group*
1289	/// index.
1290	/// A map from every capture group index to its corresponding capture group*
1291	/// name.
1292	/// A map from capture group index to its corresponding slot index. A slot*
1293	/// refers to one half of a capturing group. That is, a capture slot is either
1294	/// the start or end of a capturing group. A slot is usually the mechanism
1295	/// by which a regex engine records offsets for each capturing group during a
1296	/// search.
1297	///
1298	/// A `GroupInfo` uses reference counting internally and is thus cheap to
1299	/// clone.
1300	///
1301	/// # Mapping from capture groups to slots
1302	///
1303	/// One of the main responsibilities of a `GroupInfo` is to build a mapping
1304	/// from `(PatternID, u32)` (where the `u32` is a capture index) to something
1305	/// called a "slot." As mentioned above, a slot refers to one half of a
1306	/// capturing group. Both combined provide the start and end offsets of
1307	/// a capturing group that participated in a match.
1308	///
1309	/// The mapping between group indices and slots is an API guarantee.* That*
1310	/// is, the mapping won't change within a semver compatible release.
1311	///
1312	/// Slots exist primarily because this is a convenient mechanism by which
1313	/// regex engines report group offsets at search time. For example, the
1314	/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture)
1315	/// NFA state includes the slot index. When a regex engine transitions through
1316	/// this state, it will likely use the slot index to write the current haystack
1317	/// offset to some region of memory. When a match is found, those slots are
1318	/// then reported to the caller, typically via a convenient abstraction like a
1319	/// [`Captures`] value.
1320	///
1321	/// Because this crate provides first class support for multi-pattern regexes,
1322	/// and because of some performance related reasons, the mapping between
1323	/// capturing groups and slots is a little complex. However, in the case of a
1324	/// single pattern, the mapping can be described very simply: for all capture
1325	/// group indices `i`, its corresponding slots are at `i 2` and `i * 2 + 1`.*
1326	/// Notice that the pattern ID isn't involved at all here, because it only
1327	/// applies to a single-pattern regex, it is therefore always `0`.
1328	///
1329	/// In the multi-pattern case, the mapping is a bit more complicated. To talk
1330	/// about it, we must define what we mean by "implicit" vs "explicit"
1331	/// capturing groups:
1332	///
1333	/// An implicit capturing group refers to the capturing group that is*
1334	/// present for every pattern automatically, and corresponds to the overall
1335	/// match of a pattern. Every pattern has precisely one implicit capturing
1336	/// group. It is always unnamed and it always corresponds to the capture group
1337	/// index `0`.
1338	/// An explicit capturing group refers to any capturing group that*
1339	/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand
1340	/// built without any concrete syntax, it refers to any capturing group with an
1341	/// index greater than `0`.)
1342	///
1343	/// Some examples:
1344	///
1345	/// `\w+` has one implicit capturing group and zero explicit capturing*
1346	/// groups.
1347	/// `(\w+)` has one implicit group and one explicit group.*
1348	/// `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups.*
1349	///
1350	/// Turning back to the slot mapping, we can now state it as follows:
1351	///
1352	/// Given a pattern ID `pid`, the slots for its implicit group are always*
1353	/// at `pid 2` and `pid * 2 + 1`.*
1354	/// Given a pattern ID `0`, the slots for its explicit groups start*
1355	/// at `group_info.pattern_len() 2`.*
1356	/// Given a pattern ID `pid > 0`, the slots for its explicit groups start*
1357	/// immediately following where the slots for the explicit groups of `pid - 1`
1358	/// end.
1359	///
1360	/// In particular, while there is a concrete formula one can use to determine
1361	/// where the slots for the implicit group of any pattern are, there is no
1362	/// general formula for determining where the slots for explicit capturing
1363	/// groups are. This is because each pattern can contain a different number
1364	/// of groups.
1365	///
1366	/// The intended way of getting the slots for a particular capturing group
1367	/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or
1368	/// [`GroupInfo::slots`] method.
1369	///
1370	/// See below for a concrete example of how capturing groups get mapped to
1371	/// slots.
1372	///
1373	/// # Example
1374	///
1375	/// This example shows how to build a new `GroupInfo` and query it for
1376	/// information.
1377	///
1378	/// ```
1379	/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1380	///
1381	/// let info = GroupInfo::new(vec![
1382	/// vec![None, Some("foo")],
1383	/// vec![None],
1384	/// vec![None, None, None, Some("bar"), None],
1385	/// vec![None, None, Some("foo")],
1386	/// ])?;
1387	/// // The number of patterns being tracked.
1388	/// assert_eq!(`4`, info.pattern_len());
1389	/// // We can query the number of groups for any pattern.
1390	/// assert_eq!(`2`, info.group_len(PatternID::must(`0`)));
1391	/// assert_eq!(`1`, info.group_len(PatternID::must(`1`)));
1392	/// assert_eq!(`5`, info.group_len(PatternID::must(`2`)));
1393	/// assert_eq!(`3`, info.group_len(PatternID::must(`3`)));
1394	/// // An invalid pattern always has zero groups.
1395	/// assert_eq!(`0`, info.group_len(PatternID::must(`999`)));
1396	/// // 2 slots per group
1397	/// assert_eq!(`22`, info.slot_len());
1398	///
1399	/// // We can map a group index for a particular pattern to its name, if
1400	/// // one exists.
1401	/// assert_eq!(Some("foo"), info.to_name(PatternID::must(`3`), `2`));
1402	/// assert_eq!(None, info.to_name(PatternID::must(`2`), `4`));
1403	/// // Or map a name to its group index.
1404	/// assert_eq!(Some(`1`), info.to_index(PatternID::must(`0`), "foo"));
1405	/// assert_eq!(Some(`2`), info.to_index(PatternID::must(`3`), "foo"));
1406	///
1407	/// # Ok::<(), Box<dyn std::error::Error>>(())
1408	/// ```
1409	///
1410	/// # Example: mapping from capture groups to slots
1411	///
1412	/// This example shows the specific mapping from capture group indices for
1413	/// each pattern to their corresponding slots. The slot values shown in this
1414	/// example are considered an API guarantee.
1415	///
1416	/// ```
1417	/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1418	///
1419	/// let info = GroupInfo::new(vec![
1420	/// vec![None, Some("foo")],
1421	/// vec![None],
1422	/// vec![None, None, None, Some("bar"), None],
1423	/// vec![None, None, Some("foo")],
1424	/// ])?;
1425	///
1426	/// // We first show the slots for each pattern's implicit group.
1427	/// assert_eq!(Some((`0`, `1`)), info.slots(PatternID::must(`0`), `0`));
1428	/// assert_eq!(Some((`2`, `3`)), info.slots(PatternID::must(`1`), `0`));
1429	/// assert_eq!(Some((`4`, `5`)), info.slots(PatternID::must(`2`), `0`));
1430	/// assert_eq!(Some((`6`, `7`)), info.slots(PatternID::must(`3`), `0`));
1431	///
1432	/// // And now we show the slots for each pattern's explicit group.
1433	/// assert_eq!(Some((`8`, `9`)), info.slots(PatternID::must(`0`), `1`));
1434	/// assert_eq!(Some((`10`, `11`)), info.slots(PatternID::must(`2`), `1`));
1435	/// assert_eq!(Some((`12`, `13`)), info.slots(PatternID::must(`2`), `2`));
1436	/// assert_eq!(Some((`14`, `15`)), info.slots(PatternID::must(`2`), `3`));
1437	/// assert_eq!(Some((`16`, `17`)), info.slots(PatternID::must(`2`), `4`));
1438	/// assert_eq!(Some((`18`, `19`)), info.slots(PatternID::must(`3`), `1`));
1439	/// assert_eq!(Some((`20`, `21`)), info.slots(PatternID::must(`3`), `2`));
1440	///
1441	/// // Asking for the slots for an invalid pattern ID or even for an invalid
1442	/// // group index for a specific pattern will return None. So for example,
1443	/// // you're guaranteed to not get the slots for a different pattern than the
1444	/// // one requested.
1445	/// assert_eq!(None, info.slots(PatternID::must(`5`), `0`));
1446	/// assert_eq!(None, info.slots(PatternID::must(`1`), `1`));
1447	///
1448	/// # Ok::<(), Box<dyn std::error::Error>>(())
1449	/// ```
1450	#[derive(Clone, Debug, Default)]
1451	pub struct GroupInfo(Arc<GroupInfoInner>);
1452
1453	impl GroupInfo {
1454	/// Creates a new group info from a sequence of patterns, where each
1455	/// sequence of patterns yields a sequence of possible group names. The
1456	/// index of each pattern in the sequence corresponds to its `PatternID`,
1457	/// and the index of each group in each pattern's sequence corresponds to
1458	/// its corresponding group index.
1459	///
1460	/// While this constructor is very generic and therefore perhaps hard to
1461	/// chew on, an example of a valid concrete type that can be passed to
1462	/// this constructor is `Vec<Vec<Option<String>>>`. The outer `Vec`
1463	/// corresponds to the patterns, i.e., one `Vec<Option<String>>` per
1464	/// pattern. The inner `Vec` corresponds to the capturing groups for
1465	/// each pattern. The `Option<String>` corresponds to the name of the
1466	/// capturing group, if present.
1467	///
1468	/// It is legal to pass an empty iterator to this constructor. It will
1469	/// return an empty group info with zero slots. An empty group info is
1470	/// useful for cases where you have no patterns or for cases where slots
1471	/// aren't being used at all (e.g., for most DFAs in this crate).
1472	///
1473	/// # Errors
1474	///
1475	/// This constructor returns an error if the given capturing groups are
1476	/// invalid in some way. Those reasons include, but are not necessarily
1477	/// limited to:
1478	///
1479	/// Too many patterns (i.e., `PatternID` would overflow).*
1480	/// Too many capturing groups (e.g., `u32` would overflow).*
1481	/// A pattern is given that has no capturing groups. (All patterns must*
1482	/// have at least an implicit capturing group at index `0`.)
1483	/// The capturing group at index `0` has a name. It must be unnamed.*
1484	/// There are duplicate capturing group names within the same pattern.*
1485	/// (Multiple capturing groups with the same name may exist, but they
1486	/// must be in different patterns.)
1487	///
1488	/// An example below shows how to trigger some of the above error
1489	/// conditions.
1490	///
1491	/// # Example
1492	///
1493	/// This example shows how to build a new `GroupInfo` and query it for
1494	/// information.
1495	///
1496	/// ```
1497	/// use regex_automata::util::captures::GroupInfo;
1498	///
1499	/// let info = GroupInfo::new(vec![
1500	/// vec![None, Some("foo")],
1501	/// vec![None],
1502	/// vec![None, None, None, Some("bar"), None],
1503	/// vec![None, None, Some("foo")],
1504	/// ])?;
1505	/// // The number of patterns being tracked.
1506	/// assert_eq!(`4`, info.pattern_len());
1507	/// // 2 slots per group
1508	/// assert_eq!(`22`, info.slot_len());
1509	///
1510	/// # Ok::<(), Box<dyn std::error::Error>>(())
1511	/// ```
1512	///
1513	/// # Example: empty `GroupInfo`
1514	///
1515	/// This example shows how to build a new `GroupInfo` and query it for
1516	/// information.
1517	///
1518	/// ```
1519	/// use regex_automata::util::captures::GroupInfo;
1520	///
1521	/// let info = GroupInfo::empty();
1522	/// // Everything is zero.
1523	/// assert_eq!(`0`, info.pattern_len());
1524	/// assert_eq!(`0`, info.slot_len());
1525	///
1526	/// # Ok::<(), Box<dyn std::error::Error>>(())
1527	/// ```
1528	///
1529	/// # Example: error conditions
1530	///
1531	/// This example shows how to provoke some of the ways in which building
1532	/// a `GroupInfo` can fail.
1533	///
1534	/// ```
1535	/// use regex_automata::util::captures::GroupInfo;
1536	///
1537	/// // Either the group info is empty, or all patterns must have at least
1538	/// // one capturing group.
1539	/// assert!(GroupInfo::new(vec![
1540	/// vec![None, Some("a")], // ok
1541	/// vec![None], // ok
1542	/// vec![], // not ok
1543	/// ]).is_err());
1544	/// // Note that building an empty group info is OK.
1545	/// assert!(GroupInfo::new(Vec::<Vec<Option<String>>>::new()).is_ok());
1546	///
1547	/// // The first group in each pattern must correspond to an implicit
1548	/// // anonymous group. i.e., One that is not named. By convention, this
1549	/// // group corresponds to the overall match of a regex. Every other group
1550	/// // in a pattern is explicit and optional.
1551	/// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err());
1552	///
1553	/// // There must not be duplicate group names within the same pattern.
1554	/// assert!(GroupInfo::new(vec![
1555	/// vec![None, Some("foo"), Some("foo")],
1556	/// ]).is_err());
1557	/// // But duplicate names across distinct patterns is OK.
1558	/// assert!(GroupInfo::new(vec![
1559	/// vec![None, Some("foo")],
1560	/// vec![None, Some("foo")],
1561	/// ]).is_ok());
1562	///
1563	/// # Ok::<(), Box<dyn std::error::Error>>(())
1564	/// ```
1565	///
1566	/// There are other ways for building a `GroupInfo` to fail but are
1567	/// difficult to show. For example, if the number of patterns given would
1568	/// overflow `PatternID`.
1569	pub fn new<P, G, N>(pattern_groups: P) -> Result<GroupInfo, GroupInfoError>
1570	where
1571	P: IntoIterator<Item = G>,
1572	G: IntoIterator<Item = Option<N>>,
1573	N: AsRef<str>,
1574	{
1575	let mut group_info = GroupInfoInner {
1576	slot_ranges: vec![],
1577	name_to_index: vec![],
1578	index_to_name: vec![],
1579	memory_extra: `0`,
1580	};
1581	for (pattern_index, groups) in pattern_groups.into_iter().enumerate() {
1582	// If we can't convert the pattern index to an ID, then the caller
1583	// tried to build capture info for too many patterns.
1584	let pid = PatternID::new(pattern_index)
1585	.map_err(GroupInfoError::too_many_patterns)?;
1586
1587	let mut groups_iter = groups.into_iter().enumerate();
1588	match groups_iter.next() {
1589	None => return Err(GroupInfoError::missing_groups(pid)),
1590	Some((_, Some(_))) => {
1591	return Err(GroupInfoError::first_must_be_unnamed(pid))
1592	}
1593	Some((_, None)) => {}
1594	}
1595	group_info.add_first_group(pid);
1596	// Now iterate over the rest, which correspond to all of the
1597	// (conventionally) explicit capture groups in a regex pattern.
1598	for (group_index, maybe_name) in groups_iter {
1599	// Just like for patterns, if the group index can't be
1600	// converted to a "small" index, then the caller has given too
1601	// many groups for a particular pattern.
1602	let group = SmallIndex::new(group_index).map_err(\|_\| {
1603	GroupInfoError::too_many_groups(pid, group_index)
1604	})?;
1605	group_info.add_explicit_group(pid, group, maybe_name)?;
1606	}
1607	}
1608	group_info.fixup_slot_ranges()?;
1609	Ok(GroupInfo(Arc::new(group_info)))
1610	}
1611
1612	/// This creates an empty `GroupInfo`.
1613	///
1614	/// This is a convenience routine for calling `GroupInfo::new` with an
1615	/// iterator that yields no elements.
1616	///
1617	/// # Example
1618	///
1619	/// This example shows how to build a new empty `GroupInfo` and query it
1620	/// for information.
1621	///
1622	/// ```
1623	/// use regex_automata::util::captures::GroupInfo;
1624	///
1625	/// let info = GroupInfo::empty();
1626	/// // Everything is zero.
1627	/// assert_eq!(`0`, info.pattern_len());
1628	/// assert_eq!(`0`, info.all_group_len());
1629	/// assert_eq!(`0`, info.slot_len());
1630	///
1631	/// # Ok::<(), Box<dyn std::error::Error>>(())
1632	/// ```
1633	pub fn empty() -> GroupInfo {
1634	GroupInfo::new(core::iter::empty::<[Option<&str>; `0`]>())
1635	.expect("empty group info is always valid")
1636	}
1637
1638	/// Return the capture group index corresponding to the given name in the
1639	/// given pattern. If no such capture group name exists in the given
1640	/// pattern, then this returns `None`.
1641	///
1642	/// If the given pattern ID is invalid, then this returns `None`.
1643	///
1644	/// This also returns `None` for all inputs if these captures are empty
1645	/// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1646	/// are are present for a specific pattern, use [`GroupInfo::group_len`].
1647	///
1648	/// # Example
1649	///
1650	/// This example shows how to find the capture index for the given pattern
1651	/// and group name.
1652	///
1653	/// Remember that capture indices are relative to the pattern, such that
1654	/// the same capture index value may refer to different capturing groups
1655	/// for distinct patterns.
1656	///
1657	/// ```
1658	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1659	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1660	///
1661	/// let (pid0, pid1) = (PatternID::must(`0`), PatternID::must(`1`));
1662	///
1663	/// let nfa = NFA::new_many(&[
1664	/// r"a(?P<quux>\w+)z(?P<foo>\s+)",
1665	/// r"a(?P<foo>\d+)z",
1666	/// ])?;
1667	/// let groups = nfa.group_info();
1668	/// assert_eq!(Some(`2`), groups.to_index(pid0, "foo"));
1669	/// // Recall that capture index 0 is always unnamed and refers to the
1670	/// // entire pattern. So the first capturing group present in the pattern
1671	/// // itself always starts at index 1.
1672	/// assert_eq!(Some(`1`), groups.to_index(pid1, "foo"));
1673	///
1674	/// // And if a name does not exist for a particular pattern, None is
1675	/// // returned.
1676	/// assert!(groups.to_index(pid0, "quux").is_some());
1677	/// assert!(groups.to_index(pid1, "quux").is_none());
1678	///
1679	/// # Ok::<(), Box<dyn std::error::Error>>(())
1680	/// ```
1681	#[inline]
1682	pub fn to_index(&self, pid: PatternID, name: &str) -> Option<usize> {
1683	let indices = self.0.name_to_index.get(pid.as_usize())?;
1684	indices.get(name).cloned().map(\|i\| i.as_usize())
1685	}
1686
1687	/// Return the capture name for the given index and given pattern. If the
1688	/// corresponding group does not have a name, then this returns `None`.
1689	///
1690	/// If the pattern ID is invalid, then this returns `None`.
1691	///
1692	/// If the group index is invalid for the given pattern, then this returns
1693	/// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and
1694	/// only if `index < nfa.pattern_capture_len(pid)`.
1695	///
1696	/// This also returns `None` for all inputs if these captures are empty
1697	/// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1698	/// are are present for a specific pattern, use [`GroupInfo::group_len`].
1699	///
1700	/// # Example
1701	///
1702	/// This example shows how to find the capture group name for the given
1703	/// pattern and group index.
1704	///
1705	/// ```
1706	/// # if cfg!(miri) { return Ok(()); } // miri takes too long
1707	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1708	///
1709	/// let (pid0, pid1) = (PatternID::must(`0`), PatternID::must(`1`));
1710	///
1711	/// let nfa = NFA::new_many(&[
1712	/// r"a(?P<foo>\w+)z(\s+)x(\d+)",
1713	/// r"a(\d+)z(?P<foo>\s+)",
1714	/// ])?;
1715	/// let groups = nfa.group_info();
1716	/// assert_eq!(None, groups.to_name(pid0, `0`));
1717	/// assert_eq!(Some("foo"), groups.to_name(pid0, `1`));
1718	/// assert_eq!(None, groups.to_name(pid0, `2`));
1719	/// assert_eq!(None, groups.to_name(pid0, `3`));
1720	///
1721	/// assert_eq!(None, groups.to_name(pid1, `0`));
1722	/// assert_eq!(None, groups.to_name(pid1, `1`));
1723	/// assert_eq!(Some("foo"), groups.to_name(pid1, `2`));
1724	/// // '3' is not a valid capture index for the second pattern.
1725	/// assert_eq!(None, groups.to_name(pid1, `3`));
1726	///
1727	/// # Ok::<(), Box<dyn std::error::Error>>(())
1728	/// ```
1729	#[inline]
1730	pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> {
1731	let pattern_names = self.0.index_to_name.get(pid.as_usize())?;
1732	pattern_names.get(group_index)?.as_deref()
1733	}
1734
1735	/// Return an iterator of all capture groups and their names (if present)
1736	/// for a particular pattern.
1737	///
1738	/// If the given pattern ID is invalid or if this `GroupInfo` is empty,
1739	/// then the iterator yields no elements.
1740	///
1741	/// The number of elements yielded by this iterator is always equal to
1742	/// the result of calling [`GroupInfo::group_len`] with the same
1743	/// `PatternID`.
1744	///
1745	/// # Example
1746	///
1747	/// This example shows how to get a list of all capture group names for
1748	/// a particular pattern.
1749	///
1750	/// ```
1751	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1752	///
1753	/// let nfa = NFA::new(r"(a)(?P<foo>b)(c)(d)(?P<bar>e)")?;
1754	/// // The first is the implicit group that is always unnammed. The next
1755	/// // 5 groups are the explicit groups found in the concrete syntax above.
1756	/// let expected = vec![None, None, Some("foo"), None, None, Some("bar")];
1757	/// let got: Vec<Option<&str>> =
1758	/// nfa.group_info().pattern_names(PatternID::ZERO).collect();
1759	/// assert_eq!(expected, got);
1760	///
1761	/// // Using an invalid pattern ID will result in nothing yielded.
1762	/// let got = nfa.group_info().pattern_names(PatternID::must(`999`)).count();
1763	/// assert_eq!(`0`, got);
1764	///
1765	/// # Ok::<(), Box<dyn std::error::Error>>(())
1766	/// ```
1767	#[inline]
1768	pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> {
1769	GroupInfoPatternNames {
1770	it: self
1771	.0
1772	.index_to_name
1773	.get(pid.as_usize())
1774	.map(\|indices\| indices.iter())
1775	.unwrap_or([].iter()),
1776	}
1777	}
1778
1779	/// Return an iterator of all capture groups for all patterns supported by
1780	/// this `GroupInfo`. Each item yielded is a triple of the group's pattern
1781	/// ID, index in the pattern and the group's name, if present.
1782	///
1783	/// # Example
1784	///
1785	/// This example shows how to get a list of all capture groups found in
1786	/// one NFA, potentially spanning multiple patterns.
1787	///
1788	/// ```
1789	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1790	///
1791	/// let nfa = NFA::new_many(&[
1792	/// r"(?P<foo>a)",
1793	/// r"a",
1794	/// r"(a)",
1795	/// ])?;
1796	/// let expected = vec![
1797	/// (PatternID::must(`0`), `0`, None),
1798	/// (PatternID::must(`0`), `1`, Some("foo")),
1799	/// (PatternID::must(`1`), `0`, None),
1800	/// (PatternID::must(`2`), `0`, None),
1801	/// (PatternID::must(`2`), `1`, None),
1802	/// ];
1803	/// let got: Vec<(PatternID, usize, Option<&str>)> =
1804	/// nfa.group_info().all_names().collect();
1805	/// assert_eq!(expected, got);
1806	///
1807	/// # Ok::<(), Box<dyn std::error::Error>>(())
1808	/// ```
1809	///
1810	/// Unlike other capturing group related routines, this routine doesn't
1811	/// panic even if captures aren't enabled on this NFA:
1812	///
1813	/// ```
1814	/// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
1815	///
1816	/// let nfa = NFA::compiler()
1817	/// .configure(NFA::config().which_captures(WhichCaptures::None))
1818	/// .build_many(&[
1819	/// r"(?P<foo>a)",
1820	/// r"a",
1821	/// r"(a)",
1822	/// ])?;
1823	/// // When captures aren't enabled, there's nothing to return.
1824	/// assert_eq!(`0`, nfa.group_info().all_names().count());
1825	///
1826	/// # Ok::<(), Box<dyn std::error::Error>>(())
1827	/// ```
1828	#[inline]
1829	pub fn all_names(&self) -> GroupInfoAllNames<'_> {
1830	GroupInfoAllNames {
1831	group_info: self,
1832	pids: PatternID::iter(self.pattern_len()),
1833	current_pid: None,
1834	names: None,
1835	}
1836	}
1837
1838	/// Returns the starting and ending slot corresponding to the given
1839	/// capturing group for the given pattern. The ending slot is always one
1840	/// more than the starting slot returned.
1841	///
1842	/// Note that this is like [`GroupInfo::slot`], except that it also returns
1843	/// the ending slot value for convenience.
1844	///
1845	/// If either the pattern ID or the capture index is invalid, then this
1846	/// returns None.
1847	///
1848	/// # Example
1849	///
1850	/// This example shows that the starting slots for the first capturing
1851	/// group of each pattern are distinct.
1852	///
1853	/// ```
1854	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1855	///
1856	/// let nfa = NFA::new_many(&["a", "b"])?;
1857	/// assert_ne!(
1858	/// nfa.group_info().slots(PatternID::must(`0`), `0`),
1859	/// nfa.group_info().slots(PatternID::must(`1`), `0`),
1860	/// );
1861	///
1862	/// // Also, the start and end slot values are never equivalent.
1863	/// let (start, end) = nfa.group_info().slots(PatternID::ZERO, `0`).unwrap();
1864	/// assert_ne!(start, end);
1865	///
1866	/// # Ok::<(), Box<dyn std::error::Error>>(())
1867	/// ```
1868	#[inline]
1869	pub fn slots(
1870	&self,
1871	pid: PatternID,
1872	group_index: usize,
1873	) -> Option<(usize, usize)> {
1874	// Since 'slot' only even returns valid starting slots, we know that
1875	// there must also be an end slot and that end slot is always one more
1876	// than the start slot.
1877	self.slot(pid, group_index).map(\|start\| (start, start + `1`))
1878	}
1879
1880	/// Returns the starting slot corresponding to the given capturing group
1881	/// for the given pattern. The ending slot is always one more than the
1882	/// value returned.
1883	///
1884	/// If either the pattern ID or the capture index is invalid, then this
1885	/// returns None.
1886	///
1887	/// # Example
1888	///
1889	/// This example shows that the starting slots for the first capturing
1890	/// group of each pattern are distinct.
1891	///
1892	/// ```
1893	/// use regex_automata::{nfa::thompson::NFA, PatternID};
1894	///
1895	/// let nfa = NFA::new_many(&["a", "b"])?;
1896	/// assert_ne!(
1897	/// nfa.group_info().slot(PatternID::must(`0`), `0`),
1898	/// nfa.group_info().slot(PatternID::must(`1`), `0`),
1899	/// );
1900	///
1901	/// # Ok::<(), Box<dyn std::error::Error>>(())
1902	/// ```
1903	#[inline]
1904	pub fn slot(&self, pid: PatternID, group_index: usize) -> Option<usize> {
1905	if group_index >= self.group_len(pid) {
1906	return None;
1907	}
1908	// At this point, we know that 'pid' refers to a real pattern and that
1909	// 'group_index' refers to a real group. We therefore also know that
1910	// the pattern and group can be combined to return a correct slot.
1911	// That's why we don't need to use checked arithmetic below.
1912	if group_index == `0` {
1913	Some(pid.as_usize() * `2`)
1914	} else {
1915	// As above, we don't need to check that our slot is less than the
1916	// end of our range since we already know the group index is a
1917	// valid index for the given pattern.
1918	let (start, _) = self.0.slot_ranges[pid];
1919	Some(start.as_usize() + ((group_index - `1`) * `2`))
1920	}
1921	}
1922
1923	/// Returns the total number of patterns in this `GroupInfo`.
1924	///
1925	/// This may return zero if the `GroupInfo` was constructed with no
1926	/// patterns.
1927	///
1928	/// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because
1929	/// `GroupInfo` construction will fail if too many patterns are added.
1930	///
1931	/// # Example
1932	///
1933	/// ```
1934	/// use regex_automata::nfa::thompson::NFA;
1935	///
1936	/// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
1937	/// assert_eq!(`3`, nfa.group_info().pattern_len());
1938	///
1939	/// let nfa = NFA::never_match();
1940	/// assert_eq!(`0`, nfa.group_info().pattern_len());
1941	///
1942	/// let nfa = NFA::always_match();
1943	/// assert_eq!(`1`, nfa.group_info().pattern_len());
1944	///
1945	/// # Ok::<(), Box<dyn std::error::Error>>(())
1946	/// ```
1947	#[inline]
1948	pub fn pattern_len(&self) -> usize {
1949	self.0.pattern_len()
1950	}
1951
1952	/// Return the number of capture groups in a pattern.
1953	///
1954	/// If the pattern ID is invalid, then this returns `0`.
1955	///
1956	/// # Example
1957	///
1958	/// This example shows how the values returned by this routine may vary
1959	/// for different patterns and NFA configurations.
1960	///
1961	/// ```
1962	/// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
1963	///
1964	/// let nfa = NFA::new(r"(a)(b)(c)")?;
1965	/// // There are 3 explicit groups in the pattern's concrete syntax and
1966	/// // 1 unnamed and implicit group spanning the entire pattern.
1967	/// assert_eq!(`4`, nfa.group_info().group_len(PatternID::ZERO));
1968	///
1969	/// let nfa = NFA::new(r"abc")?;
1970	/// // There is just the unnamed implicit group.
1971	/// assert_eq!(`1`, nfa.group_info().group_len(PatternID::ZERO));
1972	///
1973	/// let nfa = NFA::compiler()
1974	/// .configure(NFA::config().which_captures(WhichCaptures::None))
1975	/// .build(r"abc")?;
1976	/// // We disabled capturing groups, so there are none.
1977	/// assert_eq!(`0`, nfa.group_info().group_len(PatternID::ZERO));
1978	///
1979	/// let nfa = NFA::compiler()
1980	/// .configure(NFA::config().which_captures(WhichCaptures::None))
1981	/// .build(r"(a)(b)(c)")?;
1982	/// // We disabled capturing groups, so there are none, even if there are
1983	/// // explicit groups in the concrete syntax.
1984	/// assert_eq!(`0`, nfa.group_info().group_len(PatternID::ZERO));
1985	///
1986	/// # Ok::<(), Box<dyn std::error::Error>>(())
1987	/// ```
1988	#[inline]
1989	pub fn group_len(&self, pid: PatternID) -> usize {
1990	self.0.group_len(pid)
1991	}
1992
1993	/// Return the total number of capture groups across all patterns.
1994	///
1995	/// This includes implicit groups that represent the entire match of a
1996	/// pattern.
1997	///
1998	/// # Example
1999	///
2000	/// This example shows how the values returned by this routine may vary
2001	/// for different patterns and NFA configurations.
2002	///
2003	/// ```
2004	/// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
2005	///
2006	/// let nfa = NFA::new(r"(a)(b)(c)")?;
2007	/// // There are 3 explicit groups in the pattern's concrete syntax and
2008	/// // 1 unnamed and implicit group spanning the entire pattern.
2009	/// assert_eq!(`4`, nfa.group_info().all_group_len());
2010	///
2011	/// let nfa = NFA::new(r"abc")?;
2012	/// // There is just the unnamed implicit group.
2013	/// assert_eq!(`1`, nfa.group_info().all_group_len());
2014	///
2015	/// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?;
2016	/// // Each pattern has one implicit groups, and two
2017	/// // patterns have one explicit group each.
2018	/// assert_eq!(`5`, nfa.group_info().all_group_len());
2019	///
2020	/// let nfa = NFA::compiler()
2021	/// .configure(NFA::config().which_captures(WhichCaptures::None))
2022	/// .build(r"abc")?;
2023	/// // We disabled capturing groups, so there are none.
2024	/// assert_eq!(`0`, nfa.group_info().all_group_len());
2025	///
2026	/// let nfa = NFA::compiler()
2027	/// .configure(NFA::config().which_captures(WhichCaptures::None))
2028	/// .build(r"(a)(b)(c)")?;
2029	/// // We disabled capturing groups, so there are none, even if there are
2030	/// // explicit groups in the concrete syntax.
2031	/// assert_eq!(`0`, nfa.group_info().group_len(PatternID::ZERO));
2032	///
2033	/// # Ok::<(), Box<dyn std::error::Error>>(())
2034	/// ```
2035	#[inline]
2036	pub fn all_group_len(&self) -> usize {
2037	self.slot_len() / `2`
2038	}
2039
2040	/// Returns the total number of slots in this `GroupInfo` across all
2041	/// patterns.
2042	///
2043	/// The total number of slots is always twice the total number of capturing
2044	/// groups, including both implicit and explicit groups.
2045	///
2046	/// # Example
2047	///
2048	/// This example shows the relationship between the number of capturing
2049	/// groups and slots.
2050	///
2051	/// ```
2052	/// use regex_automata::util::captures::GroupInfo;
2053	///
2054	/// // There are 11 total groups here.
2055	/// let info = GroupInfo::new(vec![
2056	/// vec![None, Some("foo")],
2057	/// vec![None],
2058	/// vec![None, None, None, Some("bar"), None],
2059	/// vec![None, None, Some("foo")],
2060	/// ])?;
2061	/// // 2 slots per group gives us 112=22 slots.*
2062	/// assert_eq!(`22`, info.slot_len());
2063	///
2064	/// # Ok::<(), Box<dyn std::error::Error>>(())
2065	/// ```
2066	#[inline]
2067	pub fn slot_len(&self) -> usize {
2068	self.0.small_slot_len().as_usize()
2069	}
2070
2071	/// Returns the total number of slots for implicit capturing groups.
2072	///
2073	/// This is like [`GroupInfo::slot_len`], except it doesn't include the
2074	/// explicit slots for each pattern. Since there are always exactly 2
2075	/// implicit slots for each pattern, the number of implicit slots is always
2076	/// equal to twice the number of patterns.
2077	///
2078	/// # Example
2079	///
2080	/// This example shows the relationship between the number of capturing
2081	/// groups, implicit slots and explicit slots.
2082	///
2083	/// ```
2084	/// use regex_automata::util::captures::GroupInfo;
2085	///
2086	/// // There are 11 total groups here.
2087	/// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2088	/// // 2 slots per group gives us 112=22 slots.*
2089	/// assert_eq!(`6`, info.slot_len());
2090	/// // 2 implicit slots per pattern gives us 2 implicit slots since there
2091	/// // is 1 pattern.
2092	/// assert_eq!(`2`, info.implicit_slot_len());
2093	/// // 2 explicit capturing groups gives us 22=4 explicit slots.*
2094	/// assert_eq!(`4`, info.explicit_slot_len());
2095	///
2096	/// # Ok::<(), Box<dyn std::error::Error>>(())
2097	/// ```
2098	#[inline]
2099	pub fn implicit_slot_len(&self) -> usize {
2100	self.pattern_len() * `2`
2101	}
2102
2103	/// Returns the total number of slots for explicit capturing groups.
2104	///
2105	/// This is like [`GroupInfo::slot_len`], except it doesn't include the
2106	/// implicit slots for each pattern. (There are always 2 implicit slots for
2107	/// each pattern.)
2108	///
2109	/// For a non-empty `GroupInfo`, it is always the case that `slot_len` is
2110	/// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`,
2111	/// both the total number of slots and the number of explicit slots is
2112	/// `0`.
2113	///
2114	/// # Example
2115	///
2116	/// This example shows the relationship between the number of capturing
2117	/// groups, implicit slots and explicit slots.
2118	///
2119	/// ```
2120	/// use regex_automata::util::captures::GroupInfo;
2121	///
2122	/// // There are 11 total groups here.
2123	/// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2124	/// // 2 slots per group gives us 112=22 slots.*
2125	/// assert_eq!(`6`, info.slot_len());
2126	/// // 2 implicit slots per pattern gives us 2 implicit slots since there
2127	/// // is 1 pattern.
2128	/// assert_eq!(`2`, info.implicit_slot_len());
2129	/// // 2 explicit capturing groups gives us 22=4 explicit slots.*
2130	/// assert_eq!(`4`, info.explicit_slot_len());
2131	///
2132	/// # Ok::<(), Box<dyn std::error::Error>>(())
2133	/// ```
2134	#[inline]
2135	pub fn explicit_slot_len(&self) -> usize {
2136	self.slot_len().saturating_sub(self.implicit_slot_len())
2137	}
2138
2139	/// Returns the memory usage, in bytes, of this `GroupInfo`.
2140	///
2141	/// This does not* include the stack size used up by this `GroupInfo`.*
2142	/// To compute that, use `std::mem::size_of::<GroupInfo>()`.
2143	#[inline]
2144	pub fn memory_usage(&self) -> usize {
2145	use core::mem::size_of as s;
2146
2147	s::<GroupInfoInner>()
2148	+ self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>()
2149	+ self.0.name_to_index.len() * s::<CaptureNameMap>()
2150	+ self.0.index_to_name.len() * s::<Vec<Option<Arc<str>>>>()
2151	+ self.0.memory_extra
2152	}
2153	}
2154
2155	/// A map from capture group name to its corresponding capture group index.
2156	///
2157	/// This type is actually wrapped inside a Vec indexed by pattern ID on a
2158	/// `GroupInfo`, since multiple patterns may have the same capture group name.
2159	/// That is, each pattern gets its own namespace of capture group names.
2160	///
2161	/// Perhaps a more memory efficient representation would be
2162	/// HashMap<(PatternID, Arc<str>), usize>, but this makes it difficult to look
2163	/// up a capture index by name without producing a `Arc<str>`, which requires
2164	/// an allocation. To fix this, I think we'd need to define our own unsized
2165	/// type or something? Anyway, I didn't give this much thought since it
2166	/// probably doesn't matter much in the grand scheme of things. But it did
2167	/// stand out to me as mildly wasteful.
2168	#[cfg(feature = "std")]
2169	type CaptureNameMap = std::collections::HashMap<Arc<str>, SmallIndex>;
2170	#[cfg(not(feature = "std"))]
2171	type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, SmallIndex>;
2172
2173	/// The inner guts of `GroupInfo`. This type only exists so that it can
2174	/// be wrapped in an `Arc` to make `GroupInfo` reference counted.
2175	#[derive(Debug, Default)]
2176	struct GroupInfoInner {
2177	slot_ranges: Vec<(SmallIndex, SmallIndex)>,
2178	name_to_index: Vec<CaptureNameMap>,
2179	index_to_name: Vec<Vec<Option<Arc<str>>>>,
2180	memory_extra: usize,
2181	}
2182
2183	impl GroupInfoInner {
2184	/// This adds the first unnamed group for the given pattern ID. The given
2185	/// pattern ID must be zero if this is the first time this method is
2186	/// called, or must be exactly one more than the pattern ID supplied to the
2187	/// previous call to this method. (This method panics if this rule is
2188	/// violated.)
2189	///
2190	/// This can be thought of as initializing the GroupInfo state for the
2191	/// given pattern and closing off the state for any previous pattern.
2192	fn add_first_group(&mut self, pid: PatternID) {
2193	assert_eq!(pid.as_usize(), self.slot_ranges.len());
2194	assert_eq!(pid.as_usize(), self.name_to_index.len());
2195	assert_eq!(pid.as_usize(), self.index_to_name.len());
2196	// This is the start of our slots for the explicit capturing groups.
2197	// Note that since the slots for the 0th group for every pattern appear
2198	// before any slots for the nth group (where n > 0) in any pattern, we
2199	// will have to fix up the slot ranges once we know how many patterns
2200	// we've added capture groups for.
2201	let slot_start = self.small_slot_len();
2202	self.slot_ranges.push((slot_start, slot_start));
2203	self.name_to_index.push(CaptureNameMap::new());
2204	self.index_to_name.push(vec![None]);
2205	self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2206	}
2207
2208	/// Add an explicit capturing group for the given pattern with the given
2209	/// index. If the group has a name, then that must be given as well.
2210	///
2211	/// Note that every capturing group except for the first or zeroth group is
2212	/// explicit.
2213	///
2214	/// This returns an error if adding this group would result in overflowing
2215	/// slot indices or if a capturing group with the same name for this
2216	/// pattern has already been added.
2217	fn add_explicit_group<N: AsRef<str>>(
2218	&mut self,
2219	pid: PatternID,
2220	group: SmallIndex,
2221	maybe_name: Option<N>,
2222	) -> Result<(), GroupInfoError> {
2223	// We also need to check that the slot index generated for
2224	// this group is also valid. Although, this is a little weird
2225	// because we offset these indices below, at which point, we'll
2226	// have to recheck them. Gosh this is annoying. Note that
2227	// the '+2' below is OK because 'end' is guaranteed to be less
2228	// than isize::MAX.
2229	let end = &mut self.slot_ranges[pid].1;
2230	*end = SmallIndex::new(end.as_usize() + `2`).map_err(\|_\| {
2231	GroupInfoError::too_many_groups(pid, group.as_usize())
2232	})?;
2233	if let Some(name) = maybe_name {
2234	let name = Arc::<str>::from(name.as_ref());
2235	if self.name_to_index[pid].contains_key(&*name) {
2236	return Err(GroupInfoError::duplicate(pid, &name));
2237	}
2238	let len = name.len();
2239	self.name_to_index[pid].insert(Arc::clone(&name), group);
2240	self.index_to_name[pid].push(Some(name));
2241	// Adds the memory used by the Arc<str> in both maps.
2242	self.memory_extra +=
2243	`2` * (len + core::mem::size_of::<Option<Arc<str>>>());
2244	// And also the value entry for the 'name_to_index' map.
2245	// This is probably an underestimate for 'name_to_index' since
2246	// hashmaps/btrees likely have some non-zero overhead, but we
2247	// assume here that they have zero overhead.
2248	self.memory_extra += core::mem::size_of::<SmallIndex>();
2249	} else {
2250	self.index_to_name[pid].push(None);
2251	self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2252	}
2253	// This is a sanity assert that checks that our group index
2254	// is in line with the number of groups added so far for this
2255	// pattern.
2256	assert_eq!(group.one_more(), self.group_len(pid));
2257	// And is also in line with the 'index_to_name' map.
2258	assert_eq!(group.one_more(), self.index_to_name[pid].len());
2259	Ok(())
2260	}
2261
2262	/// This corrects the slot ranges to account for the slots corresponding
2263	/// to the zeroth group of each pattern. That is, every slot range is
2264	/// offset by 'pattern_len() 2', since each pattern uses two slots to*
2265	/// represent the zeroth group.
2266	fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> {
2267	use crate::util::primitives::IteratorIndexExt;
2268	// Since we know number of patterns fits in PatternID and
2269	// PatternID::MAX < isize::MAX, it follows that multiplying by 2 will
2270	// never overflow usize.
2271	let offset = self.pattern_len().checked_mul(`2`).unwrap();
2272	for (pid, &mut (ref mut start, ref mut end)) in
2273	self.slot_ranges.iter_mut().with_pattern_ids()
2274	{
2275	let group_len = `1` + ((end.as_usize() - start.as_usize()) / `2`);
2276	let new_end = match end.as_usize().checked_add(offset) {
2277	Some(new_end) => new_end,
2278	None => {
2279	return Err(GroupInfoError::too_many_groups(
2280	pid, group_len,
2281	))
2282	}
2283	};
2284	*end = SmallIndex::new(new_end).map_err(\|_\| {
2285	GroupInfoError::too_many_groups(pid, group_len)
2286	})?;
2287	// Since start <= end, if end is valid then start must be too.
2288	*start = SmallIndex::new(start.as_usize() + offset).unwrap();
2289	}
2290	Ok(())
2291	}
2292
2293	/// Return the total number of patterns represented by this capture slot
2294	/// info.
2295	fn pattern_len(&self) -> usize {
2296	self.slot_ranges.len()
2297	}
2298
2299	/// Return the total number of capturing groups for the given pattern. If
2300	/// the given pattern isn't valid for this capture slot info, then 0 is
2301	/// returned.
2302	fn group_len(&self, pid: PatternID) -> usize {
2303	let (start, end) = match self.slot_ranges.get(pid.as_usize()) {
2304	None => return `0`,
2305	Some(range) => range,
2306	};
2307	// The difference between any two SmallIndex values always fits in a
2308	// usize since we know that SmallIndex::MAX <= isize::MAX-1. We also
2309	// know that start<=end by construction and that the number of groups
2310	// never exceeds SmallIndex and thus never overflows usize.
2311	`1` + ((end.as_usize() - start.as_usize()) / `2`)
2312	}
2313
2314	/// Return the total number of slots in this capture slot info as a
2315	/// "small index."
2316	fn small_slot_len(&self) -> SmallIndex {
2317	// Since slots are allocated in order of pattern (starting at 0) and
2318	// then in order of capture group, it follows that the number of slots
2319	// is the end of the range of slots for the last pattern. This is
2320	// true even when the last pattern has no capturing groups, since
2321	// 'slot_ranges' will still represent it explicitly with an empty
2322	// range.
2323	self.slot_ranges.last().map_or(SmallIndex::ZERO, \|&(_, end)\| end)
2324	}
2325	}
2326
2327	/// An error that may occur when building a `GroupInfo`.
2328	///
2329	/// Building a `GroupInfo` does a variety of checks to make sure the
2330	/// capturing groups satisfy a number of invariants. This includes, but is not
2331	/// limited to, ensuring that the first capturing group is unnamed and that
2332	/// there are no duplicate capture groups for a specific pattern.
2333	#[derive(Clone, Debug)]
2334	pub struct GroupInfoError {
2335	kind: GroupInfoErrorKind,
2336	}
2337
2338	/// The kind of error that occurs when building a `GroupInfo` fails.
2339	///
2340	/// We keep this un-exported because it's not clear how useful it is to
2341	/// export it.
2342	#[derive(Clone, Debug)]
2343	enum GroupInfoErrorKind {
2344	/// This occurs when too many patterns have been added. i.e., It would
2345	/// otherwise overflow a `PatternID`.
2346	TooManyPatterns { err: PatternIDError },
2347	/// This occurs when too many capturing groups have been added for a
2348	/// particular pattern.
2349	TooManyGroups {
2350	/// The ID of the pattern that had too many groups.
2351	pattern: PatternID,
2352	/// The minimum number of groups that the caller has tried to add for
2353	/// a pattern.
2354	minimum: usize,
2355	},
2356	/// An error that occurs when a pattern has no capture groups. Either the
2357	/// group info must be empty, or all patterns must have at least one group
2358	/// (corresponding to the unnamed group for the entire pattern).
2359	MissingGroups {
2360	/// The ID of the pattern that had no capturing groups.
2361	pattern: PatternID,
2362	},
2363	/// An error that occurs when one tries to provide a name for the capture
2364	/// group at index 0. This capturing group must currently always be
2365	/// unnamed.
2366	FirstMustBeUnnamed {
2367	/// The ID of the pattern that was found to have a named first
2368	/// capturing group.
2369	pattern: PatternID,
2370	},
2371	/// An error that occurs when duplicate capture group names for the same
2372	/// pattern are added.
2373	///
2374	/// NOTE: At time of writing, this error can never occur if you're using
2375	/// regex-syntax, since the parser itself will reject patterns with
2376	/// duplicate capture group names. This error can only occur when the
2377	/// builder is used to hand construct NFAs.
2378	Duplicate {
2379	/// The pattern in which the duplicate capture group name was found.
2380	pattern: PatternID,
2381	/// The duplicate name.
2382	name: String,
2383	},
2384	}
2385
2386	impl GroupInfoError {
2387	fn too_many_patterns(err: PatternIDError) -> GroupInfoError {
2388	GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } }
2389	}
2390
2391	fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError {
2392	GroupInfoError {
2393	kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum },
2394	}
2395	}
2396
2397	fn missing_groups(pattern: PatternID) -> GroupInfoError {
2398	GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } }
2399	}
2400
2401	fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError {
2402	GroupInfoError {
2403	kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern },
2404	}
2405	}
2406
2407	fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError {
2408	GroupInfoError {
2409	kind: GroupInfoErrorKind::Duplicate {
2410	pattern,
2411	name: String::from(name),
2412	},
2413	}
2414	}
2415	}
2416
2417	#[cfg(feature = "std")]
2418	impl std::error::Error for GroupInfoError {
2419	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2420	match self.kind {
2421	GroupInfoErrorKind::TooManyPatterns { .. }
2422	\| GroupInfoErrorKind::TooManyGroups { .. }
2423	\| GroupInfoErrorKind::MissingGroups { .. }
2424	\| GroupInfoErrorKind::FirstMustBeUnnamed { .. }
2425	\| GroupInfoErrorKind::Duplicate { .. } => None,
2426	}
2427	}
2428	}
2429
2430	impl core::fmt::Display for GroupInfoError {
2431	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2432	use self::GroupInfoErrorKind::*;
2433
2434	match self.kind {
2435	TooManyPatterns { ref err } => {
2436	write!(f, "too many patterns to build capture info: {}", err)
2437	}
2438	TooManyGroups { pattern, minimum } => {
2439	write!(
2440	f,
2441	"too many capture groups (at least {}) were \
2442	found for pattern {}",
2443	minimum,
2444	pattern.as_usize()
2445	)
2446	}
2447	MissingGroups { pattern } => write!(
2448	f,
2449	"no capturing groups found for pattern {} \
2450	(either all patterns have zero groups or all patterns have \
2451	at least one group)",
2452	pattern.as_usize(),
2453	),
2454	FirstMustBeUnnamed { pattern } => write!(
2455	f,
2456	"first capture group (at index 0) for pattern {} has a name \
2457	(it must be unnamed)",
2458	pattern.as_usize(),
2459	),
2460	Duplicate { pattern, ref name } => write!(
2461	f,
2462	"duplicate capture group name '{}' found for pattern {}",
2463	name,
2464	pattern.as_usize(),
2465	),
2466	}
2467	}
2468	}
2469
2470	/// An iterator over capturing groups and their names for a specific pattern.
2471	///
2472	/// This iterator is created by [`GroupInfo::pattern_names`].
2473	///
2474	/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2475	/// from which this iterator was created.
2476	#[derive(Clone, Debug)]
2477	pub struct GroupInfoPatternNames<'a> {
2478	it: core::slice::Iter<'a, Option<Arc<str>>>,
2479	}
2480
2481	impl GroupInfoPatternNames<'static> {
2482	fn empty() -> GroupInfoPatternNames<'static> {
2483	GroupInfoPatternNames { it: [].iter() }
2484	}
2485	}
2486
2487	impl<'a> Iterator for GroupInfoPatternNames<'a> {
2488	type Item = Option<&'a str>;
2489
2490	fn next(&mut self) -> Option<Option<&'a str>> {
2491	self.it.next().map(\|x: &Option>\| x.as_deref())
2492	}
2493
2494	fn size_hint(&self) -> (usize, Option<usize>) {
2495	self.it.size_hint()
2496	}
2497
2498	fn count(self) -> usize {
2499	self.it.count()
2500	}
2501	}
2502
2503	impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {}
2504	impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {}
2505
2506	/// An iterator over capturing groups and their names for a `GroupInfo`.
2507	///
2508	/// This iterator is created by [`GroupInfo::all_names`].
2509	///
2510	/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2511	/// from which this iterator was created.
2512	#[derive(Debug)]
2513	pub struct GroupInfoAllNames<'a> {
2514	group_info: &'a GroupInfo,
2515	pids: PatternIDIter,
2516	current_pid: Option<PatternID>,
2517	names: Option<core::iter::Enumerate<GroupInfoPatternNames<'a>>>,
2518	}
2519
2520	impl<'a> Iterator for GroupInfoAllNames<'a> {
2521	type Item = (PatternID, usize, Option<&'a str>);
2522
2523	fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> {
2524	// If the group info has no captures, then we never have anything
2525	// to yield. We need to consider this case explicitly (at time of
2526	// writing) because 'pattern_capture_names' will panic if captures
2527	// aren't enabled.
2528	if self.group_info.0.index_to_name.is_empty() {
2529	return None;
2530	}
2531	if self.current_pid.is_none() {
2532	self.current_pid = Some(self.pids.next()?);
2533	}
2534	let pid = self.current_pid.unwrap();
2535	if self.names.is_none() {
2536	self.names = Some(self.group_info.pattern_names(pid).enumerate());
2537	}
2538	let (group_index, name) = match self.names.as_mut().unwrap().next() {
2539	Some((group_index, name)) => (group_index, name),
2540	None => {
2541	self.current_pid = None;
2542	self.names = None;
2543	return self.next();
2544	}
2545	};
2546	Some((pid, group_index, name))
2547	}
2548	}
2549

Provided by KDAB

Definitions