string.rs source code [crates/regex_lite/src/string.rs]

1	use alloc::{
2	borrow::Cow, boxed::Box, string::String, string::ToString, sync::Arc, vec,
3	vec::Vec,
4	};
5
6	use crate::{
7	error::Error,
8	hir::{self, Hir},
9	int::NonMaxUsize,
10	interpolate,
11	nfa::{self, NFA},
12	pikevm::{self, Cache, PikeVM},
13	pool::CachePool,
14	};
15
16	/// A compiled regular expression for searching Unicode haystacks.
17	///
18	/// A `Regex` can be used to search haystacks, split haystacks into substrings
19	/// or replace substrings in a haystack with a different substring. All
20	/// searching is done with an implicit `(?s:.)?` at the beginning and end of*
21	/// an pattern. To force an expression to match the whole string (or a prefix
22	/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`).
23	///
24	/// While this crate will handle Unicode strings (whether in the regular
25	/// expression or in the haystack), all positions returned are byte
26	/// offsets. Every byte offset is guaranteed to be at a Unicode code point
27	/// boundary. That is, all offsets returned by the `Regex` API are guaranteed
28	/// to be ranges that can slice a `&str` without panicking.
29	///
30	/// The only methods that allocate new strings are the string replacement
31	/// methods. All other methods (searching and splitting) return borrowed
32	/// references into the haystack given.
33	///
34	/// # Example
35	///
36	/// Find the offsets of a US phone number:
37	///
38	/// ```
39	/// use regex_lite::Regex;
40	///
41	/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
42	/// let m = re.find("phone: 111-222-3333").unwrap();
43	/// assert_eq!(`7`..`19`, m.range());
44	/// ```
45	///
46	/// # Example: extracting capture groups
47	///
48	/// A common way to use regexes is with capture groups. That is, instead of
49	/// just looking for matches of an entire regex, parentheses are used to create
50	/// groups that represent part of the match.
51	///
52	/// For example, consider a haystack with multiple lines, and each line has
53	/// three whitespace delimited fields where the second field is expected to be
54	/// a number and the third field a boolean. To make this convenient, we use
55	/// the [`Captures::extract`] API to put the strings that match each group
56	/// into a fixed size array:
57	///
58	/// ```
59	/// use regex_lite::Regex;
60	///
61	/// let hay = "
62	/// rabbit 54 true
63	/// groundhog 2 true
64	/// does not match
65	/// fox 109 false
66	/// ";
67	/// let re = Regex::new(r"(?m)^\s(\S+)\s+([0-9]+)\s+(true\|false)\s$").unwrap();
68	/// let mut fields: Vec<(&str, i64, bool)> = vec![];
69	/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(\|caps\| caps.extract()) {
70	/// fields.push((f1, f2.parse()?, f3.parse()?));
71	/// }
72	/// assert_eq!(fields, vec![
73	/// ("rabbit", `54`, `true`),
74	/// ("groundhog", `2`, `true`),
75	/// ("fox", `109`, `false`),
76	/// ]);
77	///
78	/// # Ok::<(), Box<dyn std::error::Error>>(())
79	/// ```
80	pub struct Regex {
81	pikevm: Arc<PikeVM>,
82	pool: CachePool,
83	}
84
85	impl Clone for Regex {
86	fn clone(&self) -> Regex {
87	let pikevm: Arc = Arc::clone(&self.pikevm);
88	let pool: Pool … + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>> = {
89	let pikevm: Arc = Arc::clone(&self.pikevm);
90	let create: Box Cache> = Box::new(move \|\| Cache::new(&pikevm));
91	CachePool::new(create)
92	};
93	Regex { pikevm, pool }
94	}
95	}
96
97	impl core::fmt::Display for Regex {
98	/// Shows the original regular expression.
99	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
100	write!(f, "{}", self.as_str())
101	}
102	}
103
104	impl core::fmt::Debug for Regex {
105	/// Shows the original regular expression.
106	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
107	f.debug_tuple(name:"Regex").field(&self.as_str()).finish()
108	}
109	}
110
111	impl core::str::FromStr for Regex {
112	type Err = Error;
113
114	/// Attempts to parse a string into a regular expression
115	fn from_str(s: &str) -> Result<Regex, Error> {
116	Regex::new(pattern:s)
117	}
118	}
119
120	impl TryFrom<&str> for Regex {
121	type Error = Error;
122
123	/// Attempts to parse a string into a regular expression
124	fn try_from(s: &str) -> Result<Regex, Error> {
125	Regex::new(pattern:s)
126	}
127	}
128
129	impl TryFrom<String> for Regex {
130	type Error = Error;
131
132	/// Attempts to parse a string into a regular expression
133	fn try_from(s: String) -> Result<Regex, Error> {
134	Regex::new(&s)
135	}
136	}
137
138	/// Core regular expression methods.
139	impl Regex {
140	/// Compiles a regular expression. Once compiled, it can be used repeatedly
141	/// to search, split or replace substrings in a haystack.
142	///
143	/// Note that regex compilation tends to be a somewhat expensive process,
144	/// and unlike higher level environments, compilation is not automatically
145	/// cached for you. One should endeavor to compile a regex once and then
146	/// reuse it. For example, it's a bad idea to compile the same regex
147	/// repeatedly in a loop.
148	///
149	/// # Errors
150	///
151	/// If an invalid pattern is given, then an error is returned.
152	/// An error is also returned if the pattern is valid, but would
153	/// produce a regex that is bigger than the configured size limit via
154	/// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by
155	/// default.)
156	///
157	/// # Example
158	///
159	/// ```
160	/// use regex_lite::Regex;
161	///
162	/// // An Invalid pattern because of an unclosed parenthesis
163	/// assert!(Regex::new(r"foo(bar").is_err());
164	/// // An invalid pattern because the regex would be too big
165	/// // because Unicode tends to inflate things.
166	/// assert!(Regex::new(r"\w{1000000}").is_err());
167	/// ```
168	pub fn new(pattern: &str) -> Result<Regex, Error> {
169	RegexBuilder::new(pattern).build()
170	}
171
172	/// Returns true if and only if there is a match for the regex anywhere
173	/// in the haystack given.
174	///
175	/// It is recommended to use this method if all you need to do is test
176	/// whether a match exists, since the underlying matching engine may be
177	/// able to do less work.
178	///
179	/// # Example
180	///
181	/// Test if some haystack contains at least one word with exactly 13
182	/// word characters:
183	///
184	/// ```
185	/// use regex_lite::Regex;
186	///
187	/// let re = Regex::new(r"\b\w{13}\b").unwrap();
188	/// let hay = "I categorically deny having triskaidekaphobia.";
189	/// assert!(re.is_match(hay));
190	/// ```
191	#[inline]
192	pub fn is_match(&self, haystack: &str) -> bool {
193	self.is_match_at(haystack, `0`)
194	}
195
196	/// This routine searches for the first match of this regex in the
197	/// haystack given, and if found, returns a [`Match`]. The `Match`
198	/// provides access to both the byte offsets of the match and the actual
199	/// substring that matched.
200	///
201	/// Note that this should only be used if you want to find the entire
202	/// match. If instead you just want to test the existence of a match,
203	/// it's potentially faster to use `Regex::is_match(hay)` instead of
204	/// `Regex::find(hay).is_some()`.
205	///
206	/// # Example
207	///
208	/// Find the first word with exactly 13 word characters:
209	///
210	/// ```
211	/// use regex_lite::Regex;
212	///
213	/// let re = Regex::new(r"\b\w{13}\b").unwrap();
214	/// let hay = "I categorically deny having triskaidekaphobia.";
215	/// let mat = re.find(hay).unwrap();
216	/// assert_eq!(`2`..`15`, mat.range());
217	/// assert_eq!("categorically", mat.as_str());
218	/// ```
219	#[inline]
220	pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
221	self.find_at(haystack, `0`)
222	}
223
224	/// Returns an iterator that yields successive non-overlapping matches in
225	/// the given haystack. The iterator yields values of type [`Match`].
226	///
227	/// # Time complexity
228	///
229	/// Note that since `find_iter` runs potentially many searches on the
230	/// haystack and since each search has worst case `O(m n)` time*
231	/// complexity, the overall worst case time complexity for iteration is
232	/// `O(m n^2)`.*
233	///
234	/// # Example
235	///
236	/// Find every word with exactly 13 word characters:
237	///
238	/// ```
239	/// use regex_lite::Regex;
240	///
241	/// let re = Regex::new(r"\b\w{13}\b").unwrap();
242	/// let hay = "Retroactively relinquishing remunerations is reprehensible.";
243	/// let matches: Vec<_> = re.find_iter(hay).map(\|m\| m.as_str()).collect();
244	/// assert_eq!(matches, vec![
245	/// "Retroactively",
246	/// "relinquishing",
247	/// "remunerations",
248	/// "reprehensible",
249	/// ]);
250	/// ```
251	#[inline]
252	pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> {
253	Matches {
254	haystack,
255	it: self.pikevm.find_iter(self.pool.get(), haystack.as_bytes()),
256	}
257	}
258
259	/// This routine searches for the first match of this regex in the haystack
260	/// given, and if found, returns not only the overall match but also the
261	/// matches of each capture group in the regex. If no match is found, then
262	/// `None` is returned.
263	///
264	/// Capture group `0` always corresponds to an implicit unnamed group that
265	/// includes the entire match. If a match is found, this group is always
266	/// present. Subsequent groups may be named and are numbered, starting
267	/// at 1, by the order in which the opening parenthesis appears in the
268	/// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`,
269	/// `b` and `c` correspond to capture group indices `1`, `2` and `3`,
270	/// respectively.
271	///
272	/// You should only use `captures` if you need access to the capture group
273	/// matches. Otherwise, [`Regex::find`] is generally faster for discovering
274	/// just the overall match.
275	///
276	/// # Example
277	///
278	/// Say you have some haystack with movie names and their release years,
279	/// like "'Citizen Kane' (1941)". It'd be nice if we could search for
280	/// substrings looking like that, while also extracting the movie name and
281	/// its release year separately. The example below shows how to do that.
282	///
283	/// ```
284	/// use regex_lite::Regex;
285	///
286	/// let re = Regex::new(r"'([^']+)'\s+$(\d{4})$").unwrap();
287	/// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
288	/// let caps = re.captures(hay).unwrap();
289	/// assert_eq!(caps.get(`0`).unwrap().as_str(), "'Citizen Kane' (1941)");
290	/// assert_eq!(caps.get(`1`).unwrap().as_str(), "Citizen Kane");
291	/// assert_eq!(caps.get(`2`).unwrap().as_str(), "1941");
292	/// // You can also access the groups by index using the Index notation.
293	/// // Note that this will panic on an invalid index. In this case, these
294	/// // accesses are always correct because the overall regex will only
295	/// // match when these capture groups match.
296	/// assert_eq!(&caps[`0`], "'Citizen Kane' (1941)");
297	/// assert_eq!(&caps[`1`], "Citizen Kane");
298	/// assert_eq!(&caps[`2`], "1941");
299	/// ```
300	///
301	/// Note that the full match is at capture group `0`. Each subsequent
302	/// capture group is indexed by the order of its opening `(`.
303	///
304	/// We can make this example a bit clearer by using named* capture groups:*
305	///
306	/// ```
307	/// use regex_lite::Regex;
308	///
309	/// let re = Regex::new(r"'(?<title>[^']+)'\s+$(?<year>\d{4})$").unwrap();
310	/// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
311	/// let caps = re.captures(hay).unwrap();
312	/// assert_eq!(caps.get(`0`).unwrap().as_str(), "'Citizen Kane' (1941)");
313	/// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane");
314	/// assert_eq!(caps.name("year").unwrap().as_str(), "1941");
315	/// // You can also access the groups by name using the Index notation.
316	/// // Note that this will panic on an invalid group name. In this case,
317	/// // these accesses are always correct because the overall regex will
318	/// // only match when these capture groups match.
319	/// assert_eq!(&caps[`0`], "'Citizen Kane' (1941)");
320	/// assert_eq!(&caps["title"], "Citizen Kane");
321	/// assert_eq!(&caps["year"], "1941");
322	/// ```
323	///
324	/// Here we name the capture groups, which we can access with the `name`
325	/// method or the `Index` notation with a `&str`. Note that the named
326	/// capture groups are still accessible with `get` or the `Index` notation
327	/// with a `usize`.
328	///
329	/// The `0`th capture group is always unnamed, so it must always be
330	/// accessed with `get(0)` or `[0]`.
331	///
332	/// Finally, one other way to to get the matched substrings is with the
333	/// [`Captures::extract`] API:
334	///
335	/// ```
336	/// use regex_lite::Regex;
337	///
338	/// let re = Regex::new(r"'([^']+)'\s+$(\d{4})$").unwrap();
339	/// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
340	/// let (full, [title, year]) = re.captures(hay).unwrap().extract();
341	/// assert_eq!(full, "'Citizen Kane' (1941)");
342	/// assert_eq!(title, "Citizen Kane");
343	/// assert_eq!(year, "1941");
344	/// ```
345	#[inline]
346	pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> {
347	self.captures_at(haystack, `0`)
348	}
349
350	/// Returns an iterator that yields successive non-overlapping matches in
351	/// the given haystack. The iterator yields values of type [`Captures`].
352	///
353	/// This is the same as [`Regex::find_iter`], but instead of only providing
354	/// access to the overall match, each value yield includes access to the
355	/// matches of all capture groups in the regex. Reporting this extra match
356	/// data is potentially costly, so callers should only use `captures_iter`
357	/// over `find_iter` when they actually need access to the capture group
358	/// matches.
359	///
360	/// # Time complexity
361	///
362	/// Note that since `captures_iter` runs potentially many searches on the
363	/// haystack and since each search has worst case `O(m n)` time*
364	/// complexity, the overall worst case time complexity for iteration is
365	/// `O(m n^2)`.*
366	///
367	/// # Example
368	///
369	/// We can use this to find all movie titles and their release years in
370	/// some haystack, where the movie is formatted like "'Title' (xxxx)":
371	///
372	/// ```
373	/// use regex_lite::Regex;
374	///
375	/// let re = Regex::new(r"'([^']+)'\s+$([0-9]{4})$").unwrap();
376	/// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
377	/// let mut movies = vec![];
378	/// for (_, [title, year]) in re.captures_iter(hay).map(\|c\| c.extract()) {
379	/// movies.push((title, year.parse::<i64>()?));
380	/// }
381	/// assert_eq!(movies, vec![
382	/// ("Citizen Kane", `1941`),
383	/// ("The Wizard of Oz", `1939`),
384	/// ("M", `1931`),
385	/// ]);
386	/// # Ok::<(), Box<dyn std::error::Error>>(())
387	/// ```
388	///
389	/// Or with named groups:
390	///
391	/// ```
392	/// use regex_lite::Regex;
393	///
394	/// let re = Regex::new(r"'(?<title>[^']+)'\s+$(?<year>[0-9]{4})$").unwrap();
395	/// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
396	/// let mut it = re.captures_iter(hay);
397	///
398	/// let caps = it.next().unwrap();
399	/// assert_eq!(&caps["title"], "Citizen Kane");
400	/// assert_eq!(&caps["year"], "1941");
401	///
402	/// let caps = it.next().unwrap();
403	/// assert_eq!(&caps["title"], "The Wizard of Oz");
404	/// assert_eq!(&caps["year"], "1939");
405	///
406	/// let caps = it.next().unwrap();
407	/// assert_eq!(&caps["title"], "M");
408	/// assert_eq!(&caps["year"], "1931");
409	/// ```
410	#[inline]
411	pub fn captures_iter<'r, 'h>(
412	&'r self,
413	haystack: &'h str,
414	) -> CaptureMatches<'r, 'h> {
415	CaptureMatches {
416	haystack,
417	re: self,
418	it: self
419	.pikevm
420	.captures_iter(self.pool.get(), haystack.as_bytes()),
421	}
422	}
423
424	/// Returns an iterator of substrings of the haystack given, delimited by a
425	/// match of the regex. Namely, each element of the iterator corresponds to
426	/// a part of the haystack that isn't* matched by the regular expression.*
427	///
428	/// # Time complexity
429	///
430	/// Since iterators over all matches requires running potentially many
431	/// searches on the haystack, and since each search has worst case
432	/// `O(m n)` time complexity, the overall worst case time complexity for*
433	/// this routine is `O(m n^2)`.*
434	///
435	/// # Example
436	///
437	/// To split a string delimited by arbitrary amounts of spaces or tabs:
438	///
439	/// ```
440	/// use regex_lite::Regex;
441	///
442	/// let re = Regex::new(r"[ \t]+").unwrap();
443	/// let hay = "a b `\t` c`\t`d e";
444	/// let fields: Vec<&str> = re.split(hay).collect();
445	/// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
446	/// ```
447	///
448	/// # Example: more cases
449	///
450	/// Basic usage:
451	///
452	/// ```
453	/// use regex_lite::Regex;
454	///
455	/// let re = Regex::new(r" ").unwrap();
456	/// let hay = "Mary had a little lamb";
457	/// let got: Vec<&str> = re.split(hay).collect();
458	/// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]);
459	///
460	/// let re = Regex::new(r"X").unwrap();
461	/// let hay = "";
462	/// let got: Vec<&str> = re.split(hay).collect();
463	/// assert_eq!(got, vec![""]);
464	///
465	/// let re = Regex::new(r"X").unwrap();
466	/// let hay = "lionXXtigerXleopard";
467	/// let got: Vec<&str> = re.split(hay).collect();
468	/// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]);
469	///
470	/// let re = Regex::new(r"::").unwrap();
471	/// let hay = "lion::tiger::leopard";
472	/// let got: Vec<&str> = re.split(hay).collect();
473	/// assert_eq!(got, vec!["lion", "tiger", "leopard"]);
474	/// ```
475	///
476	/// If a haystack contains multiple contiguous matches, you will end up
477	/// with empty spans yielded by the iterator:
478	///
479	/// ```
480	/// use regex_lite::Regex;
481	///
482	/// let re = Regex::new(r"X").unwrap();
483	/// let hay = "XXXXaXXbXc";
484	/// let got: Vec<&str> = re.split(hay).collect();
485	/// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
486	///
487	/// let re = Regex::new(r"/").unwrap();
488	/// let hay = "(///)";
489	/// let got: Vec<&str> = re.split(hay).collect();
490	/// assert_eq!(got, vec!["(", "", "", ")"]);
491	/// ```
492	///
493	/// Separators at the start or end of a haystack are neighbored by empty
494	/// substring.
495	///
496	/// ```
497	/// use regex_lite::Regex;
498	///
499	/// let re = Regex::new(r"0").unwrap();
500	/// let hay = "010";
501	/// let got: Vec<&str> = re.split(hay).collect();
502	/// assert_eq!(got, vec!["", "1", ""]);
503	/// ```
504	///
505	/// When the empty string is used as a regex, it splits at every valid
506	/// UTF-8 boundary by default (which includes the beginning and end of the
507	/// haystack):
508	///
509	/// ```
510	/// use regex_lite::Regex;
511	///
512	/// let re = Regex::new(r"").unwrap();
513	/// let hay = "rust";
514	/// let got: Vec<&str> = re.split(hay).collect();
515	/// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]);
516	///
517	/// // Splitting by an empty string is UTF-8 aware by default!
518	/// let re = Regex::new(r"").unwrap();
519	/// let hay = "☃";
520	/// let got: Vec<&str> = re.split(hay).collect();
521	/// assert_eq!(got, vec!["", "☃", ""]);
522	/// ```
523	///
524	/// Contiguous separators (commonly shows up with whitespace), can lead to
525	/// possibly surprising behavior. For example, this code is correct:
526	///
527	/// ```
528	/// use regex_lite::Regex;
529	///
530	/// let re = Regex::new(r" ").unwrap();
531	/// let hay = " a b c";
532	/// let got: Vec<&str> = re.split(hay).collect();
533	/// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
534	/// ```
535	///
536	/// It does not* give you `["a", "b", "c"]`. For that behavior, you'd want*
537	/// to match contiguous space characters:
538	///
539	/// ```
540	/// use regex_lite::Regex;
541	///
542	/// let re = Regex::new(r" +").unwrap();
543	/// let hay = " a b c";
544	/// let got: Vec<&str> = re.split(hay).collect();
545	/// // N.B. This does still include a leading empty span because ' +'
546	/// // matches at the beginning of the haystack.
547	/// assert_eq!(got, vec!["", "a", "b", "c"]);
548	/// ```
549	#[inline]
550	pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> {
551	Split { haystack, finder: self.find_iter(haystack), last: `0` }
552	}
553
554	/// Returns an iterator of at most `limit` substrings of the haystack
555	/// given, delimited by a match of the regex. (A `limit` of `0` will return
556	/// no substrings.) Namely, each element of the iterator corresponds to a
557	/// part of the haystack that isn't* matched by the regular expression.*
558	/// The remainder of the haystack that is not split will be the last
559	/// element in the iterator.
560	///
561	/// # Time complexity
562	///
563	/// Since iterators over all matches requires running potentially many
564	/// searches on the haystack, and since each search has worst case
565	/// `O(m n)` time complexity, the overall worst case time complexity for*
566	/// this routine is `O(m n^2)`.*
567	///
568	/// Although note that the worst case time here has an upper bound given
569	/// by the `limit` parameter.
570	///
571	/// # Example
572	///
573	/// Get the first two words in some haystack:
574	///
575	/// ```
576	/// use regex_lite::Regex;
577	///
578	/// let re = Regex::new(r"\W+").unwrap();
579	/// let hay = "Hey! How are you?";
580	/// let fields: Vec<&str> = re.splitn(hay, `3`).collect();
581	/// assert_eq!(fields, vec!["Hey", "How", "are you?"]);
582	/// ```
583	///
584	/// # Examples: more cases
585	///
586	/// ```
587	/// use regex_lite::Regex;
588	///
589	/// let re = Regex::new(r" ").unwrap();
590	/// let hay = "Mary had a little lamb";
591	/// let got: Vec<&str> = re.splitn(hay, `3`).collect();
592	/// assert_eq!(got, vec!["Mary", "had", "a little lamb"]);
593	///
594	/// let re = Regex::new(r"X").unwrap();
595	/// let hay = "";
596	/// let got: Vec<&str> = re.splitn(hay, `3`).collect();
597	/// assert_eq!(got, vec![""]);
598	///
599	/// let re = Regex::new(r"X").unwrap();
600	/// let hay = "lionXXtigerXleopard";
601	/// let got: Vec<&str> = re.splitn(hay, `3`).collect();
602	/// assert_eq!(got, vec!["lion", "", "tigerXleopard"]);
603	///
604	/// let re = Regex::new(r"::").unwrap();
605	/// let hay = "lion::tiger::leopard";
606	/// let got: Vec<&str> = re.splitn(hay, `2`).collect();
607	/// assert_eq!(got, vec!["lion", "tiger::leopard"]);
608	///
609	/// let re = Regex::new(r"X").unwrap();
610	/// let hay = "abcXdef";
611	/// let got: Vec<&str> = re.splitn(hay, `1`).collect();
612	/// assert_eq!(got, vec!["abcXdef"]);
613	///
614	/// let re = Regex::new(r"X").unwrap();
615	/// let hay = "abcdef";
616	/// let got: Vec<&str> = re.splitn(hay, `2`).collect();
617	/// assert_eq!(got, vec!["abcdef"]);
618	///
619	/// let re = Regex::new(r"X").unwrap();
620	/// let hay = "abcXdef";
621	/// let got: Vec<&str> = re.splitn(hay, `0`).collect();
622	/// assert!(got.is_empty());
623	/// ```
624	#[inline]
625	pub fn splitn<'r, 'h>(
626	&'r self,
627	haystack: &'h str,
628	limit: usize,
629	) -> SplitN<'r, 'h> {
630	SplitN { splits: self.split(haystack), limit }
631	}
632
633	/// Replaces the leftmost-first match in the given haystack with the
634	/// replacement provided. The replacement can be a regular string (where
635	/// `$N` and `$name` are expanded to match capture groups) or a function
636	/// that takes a [`Captures`] and returns the replaced string.
637	///
638	/// If no match is found, then the haystack is returned unchanged. In that
639	/// case, this implementation will likely return a `Cow::Borrowed` value
640	/// such that no allocation is performed.
641	///
642	/// # Replacement string syntax
643	///
644	/// All instances of `$ref` in the replacement string are replaced with
645	/// the substring corresponding to the capture group identified by `ref`.
646	///
647	/// `ref` may be an integer corresponding to the index of the capture group
648	/// (counted by order of opening parenthesis where `0` is the entire match)
649	/// or it can be a name (consisting of letters, digits or underscores)
650	/// corresponding to a named capture group.
651	///
652	/// If `ref` isn't a valid capture group (whether the name doesn't exist or
653	/// isn't a valid index), then it is replaced with the empty string.
654	///
655	/// The longest possible name is used. For example, `$1a` looks up the
656	/// capture group named `1a` and not the capture group at index `1`. To
657	/// exert more precise control over the name, use braces, e.g., `${1}a`.
658	///
659	/// To write a literal `$` use `$$`.
660	///
661	/// # Example
662	///
663	/// Note that this function is polymorphic with respect to the replacement.
664	/// In typical usage, this can just be a normal string:
665	///
666	/// ```
667	/// use regex_lite::Regex;
668	///
669	/// let re = Regex::new(r"[^01]+").unwrap();
670	/// assert_eq!(re.replace("1078910", ""), "1010");
671	/// ```
672	///
673	/// But anything satisfying the [`Replacer`] trait will work. For example,
674	/// a closure of type `\|&Captures\| -> String` provides direct access to the
675	/// captures corresponding to a match. This allows one to access capturing
676	/// group matches easily:
677	///
678	/// ```
679	/// use regex_lite::{Captures, Regex};
680	///
681	/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
682	/// let result = re.replace("Springsteen, Bruce", \|caps: &Captures\| {
683	/// format!("{} {}", &caps[`2`], &caps[`1`])
684	/// });
685	/// assert_eq!(result, "Bruce Springsteen");
686	/// ```
687	///
688	/// But this is a bit cumbersome to use all the time. Instead, a simple
689	/// syntax is supported (as described above) that expands `$name` into the
690	/// corresponding capture group. Here's the last example, but using this
691	/// expansion technique with named capture groups:
692	///
693	/// ```
694	/// use regex_lite::Regex;
695	///
696	/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap();
697	/// let result = re.replace("Springsteen, Bruce", "$first $last");
698	/// assert_eq!(result, "Bruce Springsteen");
699	/// ```
700	///
701	/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
702	/// would produce the same result. To write a literal `$` use `$$`.
703	///
704	/// Sometimes the replacement string requires use of curly braces to
705	/// delineate a capture group replacement when it is adjacent to some other
706	/// literal text. For example, if we wanted to join two words together with
707	/// an underscore:
708	///
709	/// ```
710	/// use regex_lite::Regex;
711	///
712	/// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap();
713	/// let result = re.replace("deep fried", "${first}_$second");
714	/// assert_eq!(result, "deep_fried");
715	/// ```
716	///
717	/// Without the curly braces, the capture group name `first_` would be
718	/// used, and since it doesn't exist, it would be replaced with the empty
719	/// string.
720	///
721	/// Finally, sometimes you just want to replace a literal string with no
722	/// regard for capturing group expansion. This can be done by wrapping a
723	/// string with [`NoExpand`]:
724	///
725	/// ```
726	/// use regex_lite::{NoExpand, Regex};
727	///
728	/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap();
729	/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
730	/// assert_eq!(result, "$2 $last");
731	/// ```
732	///
733	/// Using `NoExpand` may also be faster, since the replacement string won't
734	/// need to be parsed for the `$` syntax.
735	#[inline]
736	pub fn replace<'h, R: Replacer>(
737	&self,
738	haystack: &'h str,
739	rep: R,
740	) -> Cow<'h, str> {
741	self.replacen(haystack, `1`, rep)
742	}
743
744	/// Replaces all non-overlapping matches in the haystack with the
745	/// replacement provided. This is the same as calling `replacen` with
746	/// `limit` set to `0`.
747	///
748	/// The documentation for [`Regex::replace`] goes into more detail about
749	/// what kinds of replacement strings are supported.
750	///
751	/// # Time complexity
752	///
753	/// Since iterators over all matches requires running potentially many
754	/// searches on the haystack, and since each search has worst case
755	/// `O(m n)` time complexity, the overall worst case time complexity for*
756	/// this routine is `O(m n^2)`.*
757	///
758	/// # Fallibility
759	///
760	/// If you need to write a replacement routine where any individual
761	/// replacement might "fail," doing so with this API isn't really feasible
762	/// because there's no way to stop the search process if a replacement
763	/// fails. Instead, if you need this functionality, you should consider
764	/// implementing your own replacement routine:
765	///
766	/// ```
767	/// use regex_lite::{Captures, Regex};
768	///
769	/// fn replace_all<E>(
770	/// re: &Regex,
771	/// haystack: &str,
772	/// replacement: impl Fn(&Captures) -> Result<String, E>,
773	/// ) -> Result<String, E> {
774	/// let mut new = String::with_capacity(haystack.len());
775	/// let mut last_match = `0`;
776	/// for caps in re.captures_iter(haystack) {
777	/// let m = caps.get(`0`).unwrap();
778	/// new.push_str(&haystack[last_match..m.start()]);
779	/// new.push_str(&replacement(&caps)?);
780	/// last_match = m.end();
781	/// }
782	/// new.push_str(&haystack[last_match..]);
783	/// Ok(new)
784	/// }
785	///
786	/// // Let's replace each word with the number of bytes in that word.
787	/// // But if we see a word that is "too long," we'll give up.
788	/// let re = Regex::new(r"\w+").unwrap();
789	/// let replacement = \|caps: &Captures\| -> Result<String, &'static str> {
790	/// if caps[`0`].len() >= `5` {
791	/// return Err("word too long");
792	/// }
793	/// Ok(caps[`0`].len().to_string())
794	/// };
795	/// assert_eq!(
796	/// Ok("2 3 3 3?".to_string()),
797	/// replace_all(&re, "hi how are you?", &replacement),
798	/// );
799	/// assert!(replace_all(&re, "hi there", &replacement).is_err());
800	/// ```
801	///
802	/// # Example
803	///
804	/// This example shows how to flip the order of whitespace delimited
805	/// fields, and normalizes the whitespace that delimits the fields:
806	///
807	/// ```
808	/// use regex_lite::Regex;
809	///
810	/// let re = Regex::new(r"(?m)^(\S+)\s+(\S+)$").unwrap();
811	/// let hay = "
812	/// Greetings 1973
813	/// Wild`\t`1973
814	/// BornToRun`\t\t\t\t`1975
815	/// Darkness 1978
816	/// TheRiver 1980
817	/// ";
818	/// let new = re.replace_all(hay, "$2 $1");
819	/// assert_eq!(new, "
820	/// 1973 Greetings
821	/// 1973 Wild
822	/// 1975 BornToRun
823	/// 1978 Darkness
824	/// 1980 TheRiver
825	/// ");
826	/// ```
827	#[inline]
828	pub fn replace_all<'h, R: Replacer>(
829	&self,
830	haystack: &'h str,
831	rep: R,
832	) -> Cow<'h, str> {
833	self.replacen(haystack, `0`, rep)
834	}
835
836	/// Replaces at most `limit` non-overlapping matches in the haystack with
837	/// the replacement provided. If `limit` is `0`, then all non-overlapping
838	/// matches are replaced. That is, `Regex::replace_all(hay, rep)` is
839	/// equivalent to `Regex::replacen(hay, 0, rep)`.
840	///
841	/// The documentation for [`Regex::replace`] goes into more detail about
842	/// what kinds of replacement strings are supported.
843	///
844	/// # Time complexity
845	///
846	/// Since iterators over all matches requires running potentially many
847	/// searches on the haystack, and since each search has worst case
848	/// `O(m n)` time complexity, the overall worst case time complexity for*
849	/// this routine is `O(m n^2)`.*
850	///
851	/// Although note that the worst case time here has an upper bound given
852	/// by the `limit` parameter.
853	///
854	/// # Fallibility
855	///
856	/// See the corresponding section in the docs for [`Regex::replace_all`]
857	/// for tips on how to deal with a replacement routine that can fail.
858	///
859	/// # Example
860	///
861	/// This example shows how to flip the order of whitespace delimited
862	/// fields, and normalizes the whitespace that delimits the fields. But we
863	/// only do it for the first two matches.
864	///
865	/// ```
866	/// use regex_lite::Regex;
867	///
868	/// let re = Regex::new(r"(?m)^(\S+)\s+(\S+)$").unwrap();
869	/// let hay = "
870	/// Greetings 1973
871	/// Wild`\t`1973
872	/// BornToRun`\t\t\t\t`1975
873	/// Darkness 1978
874	/// TheRiver 1980
875	/// ";
876	/// let new = re.replacen(hay, `2`, "$2 $1");
877	/// assert_eq!(new, "
878	/// 1973 Greetings
879	/// 1973 Wild
880	/// BornToRun`\t\t\t\t`1975
881	/// Darkness 1978
882	/// TheRiver 1980
883	/// ");
884	/// ```
885	#[inline]
886	pub fn replacen<'h, R: Replacer>(
887	&self,
888	haystack: &'h str,
889	limit: usize,
890	mut rep: R,
891	) -> Cow<'h, str> {
892	// If we know that the replacement doesn't have any capture expansions,
893	// then we can use the fast path. The fast path can make a tremendous
894	// difference:
895	//
896	// 1) We use `find_iter` instead of `captures_iter`. Not asking for
897	// captures generally makes the regex engines faster.
898	// 2) We don't need to look up all of the capture groups and do
899	// replacements inside the replacement string. We just push it
900	// at each match and be done with it.
901	if let Some(rep) = rep.no_expansion() {
902	let mut it = self.find_iter(haystack).enumerate().peekable();
903	if it.peek().is_none() {
904	return Cow::Borrowed(haystack);
905	}
906	let mut new = String::with_capacity(haystack.len());
907	let mut last_match = `0`;
908	for (i, m) in it {
909	new.push_str(&haystack[last_match..m.start()]);
910	new.push_str(&rep);
911	last_match = m.end();
912	if limit > `0` && i >= limit - `1` {
913	break;
914	}
915	}
916	new.push_str(&haystack[last_match..]);
917	return Cow::Owned(new);
918	}
919
920	// The slower path, which we use if the replacement needs access to
921	// capture groups.
922	let mut it = self.captures_iter(haystack).enumerate().peekable();
923	if it.peek().is_none() {
924	return Cow::Borrowed(haystack);
925	}
926	let mut new = String::with_capacity(haystack.len());
927	let mut last_match = `0`;
928	for (i, cap) in it {
929	// unwrap on 0 is OK because captures only reports matches
930	let m = cap.get(`0`).unwrap();
931	new.push_str(&haystack[last_match..m.start()]);
932	rep.replace_append(&cap, &mut new);
933	last_match = m.end();
934	if limit > `0` && i >= limit - `1` {
935	break;
936	}
937	}
938	new.push_str(&haystack[last_match..]);
939	Cow::Owned(new)
940	}
941	}
942
943	/// A group of advanced or "lower level" search methods. Some methods permit
944	/// starting the search at a position greater than `0` in the haystack. Other
945	/// methods permit reusing allocations, for example, when extracting the
946	/// matches for capture groups.
947	impl Regex {
948	/// Returns the end byte offset of the first match in the haystack given.
949	///
950	/// This method may have the same performance characteristics as
951	/// `is_match`. Behaviorlly, it doesn't just report whether it match
952	/// occurs, but also the end offset for a match. In particular, the offset
953	/// returned may be shorter* than the proper end of the leftmost-first*
954	/// match that you would find via [`Regex::find`].
955	///
956	/// Note that it is not guaranteed that this routine finds the shortest or
957	/// "earliest" possible match. Instead, the main idea of this API is that
958	/// it returns the offset at the point at which the internal regex engine
959	/// has determined that a match has occurred. This may vary depending on
960	/// which internal regex engine is used, and thus, the offset itself may
961	/// change based on internal heuristics.
962	///
963	/// # Example
964	///
965	/// Typically, `a+` would match the entire first sequence of `a` in some
966	/// haystack, but `shortest_match` may* give up as soon as it sees the*
967	/// first `a`.
968	///
969	/// ```
970	/// use regex_lite::Regex;
971	///
972	/// let re = Regex::new(r"a+").unwrap();
973	/// let offset = re.shortest_match("aaaaa").unwrap();
974	/// assert_eq!(offset, `1`);
975	/// ```
976	#[inline]
977	pub fn shortest_match(&self, haystack: &str) -> Option<usize> {
978	self.shortest_match_at(haystack, `0`)
979	}
980
981	/// Returns the same as [`Regex::shortest_match`], but starts the search at
982	/// the given offset.
983	///
984	/// The significance of the starting point is that it takes the surrounding
985	/// context into consideration. For example, the `\A` anchor can only match
986	/// when `start == 0`.
987	///
988	/// If a match is found, the offset returned is relative to the beginning
989	/// of the haystack, not the beginning of the search.
990	///
991	/// # Panics
992	///
993	/// This panics when `start >= haystack.len() + 1`.
994	///
995	/// # Example
996	///
997	/// This example shows the significance of `start` by demonstrating how it
998	/// can be used to permit look-around assertions in a regex to take the
999	/// surrounding context into account.
1000	///
1001	/// ```
1002	/// use regex_lite::Regex;
1003	///
1004	/// let re = Regex::new(r"\bchew\b").unwrap();
1005	/// let hay = "eschew";
1006	/// // We get a match here, but it's probably not intended.
1007	/// assert_eq!(re.shortest_match(&hay[`2`..]), Some(`4`));
1008	/// // No match because the assertions take the context into account.
1009	/// assert_eq!(re.shortest_match_at(hay, `2`), None);
1010	/// ```
1011	#[inline]
1012	pub fn shortest_match_at(
1013	&self,
1014	haystack: &str,
1015	start: usize,
1016	) -> Option<usize> {
1017	let mut cache = self.pool.get();
1018	let mut slots = [None, None];
1019	let matched = self.pikevm.search(
1020	&mut cache,
1021	haystack.as_bytes(),
1022	start,
1023	haystack.len(),
1024	`true`,
1025	&mut slots,
1026	);
1027	if !matched {
1028	return None;
1029	}
1030	Some(slots[`1`].unwrap().get())
1031	}
1032
1033	/// Returns the same as [`Regex::is_match`], but starts the search at the
1034	/// given offset.
1035	///
1036	/// The significance of the starting point is that it takes the surrounding
1037	/// context into consideration. For example, the `\A` anchor can only
1038	/// match when `start == 0`.
1039	///
1040	/// # Panics
1041	///
1042	/// This panics when `start >= haystack.len() + 1`.
1043	///
1044	/// # Example
1045	///
1046	/// This example shows the significance of `start` by demonstrating how it
1047	/// can be used to permit look-around assertions in a regex to take the
1048	/// surrounding context into account.
1049	///
1050	/// ```
1051	/// use regex_lite::Regex;
1052	///
1053	/// let re = Regex::new(r"\bchew\b").unwrap();
1054	/// let hay = "eschew";
1055	/// // We get a match here, but it's probably not intended.
1056	/// assert!(re.is_match(&hay[`2`..]));
1057	/// // No match because the assertions take the context into account.
1058	/// assert!(!re.is_match_at(hay, `2`));
1059	/// ```
1060	#[inline]
1061	pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
1062	let mut cache = self.pool.get();
1063	self.pikevm.search(
1064	&mut cache,
1065	haystack.as_bytes(),
1066	start,
1067	haystack.len(),
1068	`true`,
1069	&mut [],
1070	)
1071	}
1072
1073	/// Returns the same as [`Regex::find`], but starts the search at the given
1074	/// offset.
1075	///
1076	/// The significance of the starting point is that it takes the surrounding
1077	/// context into consideration. For example, the `\A` anchor can only
1078	/// match when `start == 0`.
1079	///
1080	/// # Panics
1081	///
1082	/// This panics when `start >= haystack.len() + 1`.
1083	///
1084	/// # Example
1085	///
1086	/// This example shows the significance of `start` by demonstrating how it
1087	/// can be used to permit look-around assertions in a regex to take the
1088	/// surrounding context into account.
1089	///
1090	/// ```
1091	/// use regex_lite::Regex;
1092	///
1093	/// let re = Regex::new(r"\bchew\b").unwrap();
1094	/// let hay = "eschew";
1095	/// // We get a match here, but it's probably not intended.
1096	/// assert_eq!(re.find(&hay[`2`..]).map(\|m\| m.range()), Some(`0`..`4`));
1097	/// // No match because the assertions take the context into account.
1098	/// assert_eq!(re.find_at(hay, `2`), None);
1099	/// ```
1100	#[inline]
1101	pub fn find_at<'h>(
1102	&self,
1103	haystack: &'h str,
1104	start: usize,
1105	) -> Option<Match<'h>> {
1106	let mut cache = self.pool.get();
1107	let mut slots = [None, None];
1108	let matched = self.pikevm.search(
1109	&mut cache,
1110	haystack.as_bytes(),
1111	start,
1112	haystack.len(),
1113	`false`,
1114	&mut slots,
1115	);
1116	if !matched {
1117	return None;
1118	}
1119	let (start, end) = (slots[`0`].unwrap().get(), slots[`1`].unwrap().get());
1120	Some(Match::new(haystack, start, end))
1121	}
1122
1123	/// Returns the same as [`Regex::captures`], but starts the search at the
1124	/// given offset.
1125	///
1126	/// The significance of the starting point is that it takes the surrounding
1127	/// context into consideration. For example, the `\A` anchor can only
1128	/// match when `start == 0`.
1129	///
1130	/// # Panics
1131	///
1132	/// This panics when `start >= haystack.len() + 1`.
1133	///
1134	/// # Example
1135	///
1136	/// This example shows the significance of `start` by demonstrating how it
1137	/// can be used to permit look-around assertions in a regex to take the
1138	/// surrounding context into account.
1139	///
1140	/// ```
1141	/// use regex_lite::Regex;
1142	///
1143	/// let re = Regex::new(r"\bchew\b").unwrap();
1144	/// let hay = "eschew";
1145	/// // We get a match here, but it's probably not intended.
1146	/// assert_eq!(&re.captures(&hay[`2`..]).unwrap()[`0`], "chew");
1147	/// // No match because the assertions take the context into account.
1148	/// assert!(re.captures_at(hay, `2`).is_none());
1149	/// ```
1150	#[inline]
1151	pub fn captures_at<'h>(
1152	&self,
1153	haystack: &'h str,
1154	start: usize,
1155	) -> Option<Captures<'h>> {
1156	let mut caps = Captures {
1157	haystack,
1158	slots: self.capture_locations(),
1159	pikevm: Arc::clone(&self.pikevm),
1160	};
1161	let mut cache = self.pool.get();
1162	let matched = self.pikevm.search(
1163	&mut cache,
1164	haystack.as_bytes(),
1165	start,
1166	haystack.len(),
1167	`false`,
1168	&mut caps.slots.0,
1169	);
1170	if !matched {
1171	return None;
1172	}
1173	Some(caps)
1174	}
1175
1176	/// This is like [`Regex::captures`], but writes the byte offsets of each
1177	/// capture group match into the locations given.
1178	///
1179	/// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`],
1180	/// but does not* store a reference to the haystack. This makes its API*
1181	/// a bit lower level and less convenience. But in exchange, callers
1182	/// may allocate their own `CaptureLocations` and reuse it for multiple
1183	/// searches. This may be helpful if allocating a `Captures` shows up in a
1184	/// profile as too costly.
1185	///
1186	/// To create a `CaptureLocations` value, use the
1187	/// [`Regex::capture_locations`] method.
1188	///
1189	/// This also returns the overall match if one was found. When a match is
1190	/// found, its offsets are also always stored in `locs` at index `0`.
1191	///
1192	/// # Panics
1193	///
1194	/// This routine may panic if the given `CaptureLocations` was not created
1195	/// by this regex.
1196	///
1197	/// # Example
1198	///
1199	/// ```
1200	/// use regex_lite::Regex;
1201	///
1202	/// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap();
1203	/// let mut locs = re.capture_locations();
1204	/// assert!(re.captures_read(&mut locs, "id=foo123").is_some());
1205	/// assert_eq!(Some((`0`, `9`)), locs.get(`0`));
1206	/// assert_eq!(Some((`0`, `2`)), locs.get(`1`));
1207	/// assert_eq!(Some((`3`, `9`)), locs.get(`2`));
1208	/// ```
1209	#[inline]
1210	pub fn captures_read<'h>(
1211	&self,
1212	locs: &mut CaptureLocations,
1213	haystack: &'h str,
1214	) -> Option<Match<'h>> {
1215	self.captures_read_at(locs, haystack, `0`)
1216	}
1217
1218	/// Returns the same as [`Regex::captures_read`], but starts the search at
1219	/// the given offset.
1220	///
1221	/// The significance of the starting point is that it takes the surrounding
1222	/// context into consideration. For example, the `\A` anchor can only
1223	/// match when `start == 0`.
1224	///
1225	/// # Panics
1226	///
1227	/// This panics when `start >= haystack.len() + 1`.
1228	///
1229	/// This routine may also panic if the given `CaptureLocations` was not
1230	/// created by this regex.
1231	///
1232	/// # Example
1233	///
1234	/// This example shows the significance of `start` by demonstrating how it
1235	/// can be used to permit look-around assertions in a regex to take the
1236	/// surrounding context into account.
1237	///
1238	/// ```
1239	/// use regex_lite::Regex;
1240	///
1241	/// let re = Regex::new(r"\bchew\b").unwrap();
1242	/// let hay = "eschew";
1243	/// let mut locs = re.capture_locations();
1244	/// // We get a match here, but it's probably not intended.
1245	/// assert!(re.captures_read(&mut locs, &hay[`2`..]).is_some());
1246	/// // No match because the assertions take the context into account.
1247	/// assert!(re.captures_read_at(&mut locs, hay, `2`).is_none());
1248	/// ```
1249	#[inline]
1250	pub fn captures_read_at<'h>(
1251	&self,
1252	locs: &mut CaptureLocations,
1253	haystack: &'h str,
1254	start: usize,
1255	) -> Option<Match<'h>> {
1256	let mut cache = self.pool.get();
1257	let matched = self.pikevm.search(
1258	&mut cache,
1259	haystack.as_bytes(),
1260	start,
1261	haystack.len(),
1262	`false`,
1263	&mut locs.0,
1264	);
1265	if !matched {
1266	return None;
1267	}
1268	let (start, end) = locs.get(`0`).unwrap();
1269	Some(Match::new(haystack, start, end))
1270	}
1271	}
1272
1273	/// Auxiliary methods.
1274	impl Regex {
1275	/// Returns the original string of this regex.
1276	///
1277	/// # Example
1278	///
1279	/// ```
1280	/// use regex_lite::Regex;
1281	///
1282	/// let re = Regex::new(r"foo\w+bar").unwrap();
1283	/// assert_eq!(re.as_str(), r"foo\w+bar");
1284	/// ```
1285	#[inline]
1286	pub fn as_str(&self) -> &str {
1287	&self.pikevm.nfa().pattern()
1288	}
1289
1290	/// Returns an iterator over the capture names in this regex.
1291	///
1292	/// The iterator returned yields elements of type `Option<&str>`. That is,
1293	/// the iterator yields values for all capture groups, even ones that are
1294	/// unnamed. The order of the groups corresponds to the order of the group's
1295	/// corresponding opening parenthesis.
1296	///
1297	/// The first element of the iterator always yields the group corresponding
1298	/// to the overall match, and this group is always unnamed. Therefore, the
1299	/// iterator always yields at least one group.
1300	///
1301	/// # Example
1302	///
1303	/// This shows basic usage with a mix of named and unnamed capture groups:
1304	///
1305	/// ```
1306	/// use regex_lite::Regex;
1307	///
1308	/// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap();
1309	/// let mut names = re.capture_names();
1310	/// assert_eq!(names.next(), Some(None));
1311	/// assert_eq!(names.next(), Some(Some("a")));
1312	/// assert_eq!(names.next(), Some(Some("b")));
1313	/// assert_eq!(names.next(), Some(None));
1314	/// // the '(?:.)' group is non-capturing and so doesn't appear here!
1315	/// assert_eq!(names.next(), Some(Some("c")));
1316	/// assert_eq!(names.next(), None);
1317	/// ```
1318	///
1319	/// The iterator always yields at least one element, even for regexes with
1320	/// no capture groups and even for regexes that can never match:
1321	///
1322	/// ```
1323	/// use regex_lite::Regex;
1324	///
1325	/// let re = Regex::new(r"").unwrap();
1326	/// let mut names = re.capture_names();
1327	/// assert_eq!(names.next(), Some(None));
1328	/// assert_eq!(names.next(), None);
1329	///
1330	/// let re = Regex::new(r"[^\s\S]").unwrap();
1331	/// let mut names = re.capture_names();
1332	/// assert_eq!(names.next(), Some(None));
1333	/// assert_eq!(names.next(), None);
1334	/// ```
1335	#[inline]
1336	pub fn capture_names(&self) -> CaptureNames<'_> {
1337	CaptureNames(self.pikevm.nfa().capture_names())
1338	}
1339
1340	/// Returns the number of captures groups in this regex.
1341	///
1342	/// This includes all named and unnamed groups, including the implicit
1343	/// unnamed group that is always present and corresponds to the entire
1344	/// match.
1345	///
1346	/// Since the implicit unnamed group is always included in this length, the
1347	/// length returned is guaranteed to be greater than zero.
1348	///
1349	/// # Example
1350	///
1351	/// ```
1352	/// use regex_lite::Regex;
1353	///
1354	/// let re = Regex::new(r"foo").unwrap();
1355	/// assert_eq!(`1`, re.captures_len());
1356	///
1357	/// let re = Regex::new(r"(foo)").unwrap();
1358	/// assert_eq!(`2`, re.captures_len());
1359	///
1360	/// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap();
1361	/// assert_eq!(`5`, re.captures_len());
1362	///
1363	/// let re = Regex::new(r"[^\s\S]").unwrap();
1364	/// assert_eq!(`1`, re.captures_len());
1365	/// ```
1366	#[inline]
1367	pub fn captures_len(&self) -> usize {
1368	self.pikevm.nfa().group_len()
1369	}
1370
1371	/// Returns the total number of capturing groups that appear in every
1372	/// possible match.
1373	///
1374	/// If the number of capture groups can vary depending on the match, then
1375	/// this returns `None`. That is, a value is only returned when the number
1376	/// of matching groups is invariant or "static."
1377	///
1378	/// Note that like [`Regex::captures_len`], this does* include the*
1379	/// implicit capturing group corresponding to the entire match. Therefore,
1380	/// when a non-None value is returned, it is guaranteed to be at least `1`.
1381	/// Stated differently, a return value of `Some(0)` is impossible.
1382	///
1383	/// # Example
1384	///
1385	/// This shows a few cases where a static number of capture groups is
1386	/// available and a few cases where it is not.
1387	///
1388	/// ```
1389	/// use regex_lite::Regex;
1390	///
1391	/// let len = \|pattern\| {
1392	/// Regex::new(pattern).map(\|re\| re.static_captures_len())
1393	/// };
1394	///
1395	/// assert_eq!(Some(`1`), len("a")?);
1396	/// assert_eq!(Some(`2`), len("(a)")?);
1397	/// assert_eq!(Some(`2`), len("(a)\|(b)")?);
1398	/// assert_eq!(Some(`3`), len("(a)(b)\|(c)(d)")?);
1399	/// assert_eq!(None, len("(a)\|b")?);
1400	/// assert_eq!(None, len("a\|(b)")?);
1401	/// assert_eq!(None, len("(b)*")?);
1402	/// assert_eq!(Some(`2`), len("(b)+")?);
1403	///
1404	/// # Ok::<(), Box<dyn std::error::Error>>(())
1405	/// ```
1406	#[inline]
1407	pub fn static_captures_len(&self) -> Option<usize> {
1408	self.pikevm
1409	.nfa()
1410	.static_explicit_captures_len()
1411	.map(\|len\| len.saturating_add(`1`))
1412	}
1413
1414	/// Returns a fresh allocated set of capture locations that can
1415	/// be reused in multiple calls to [`Regex::captures_read`] or
1416	/// [`Regex::captures_read_at`].
1417	///
1418	/// The returned locations can be used for any subsequent search for this
1419	/// particular regex. There is no guarantee that it is correct to use for
1420	/// other regexes, even if they have the same number of capture groups.
1421	///
1422	/// # Example
1423	///
1424	/// ```
1425	/// use regex_lite::Regex;
1426	///
1427	/// let re = Regex::new(r"(.)(.)(\w+)").unwrap();
1428	/// let mut locs = re.capture_locations();
1429	/// assert!(re.captures_read(&mut locs, "Padron").is_some());
1430	/// assert_eq!(locs.get(`0`), Some((`0`, `6`)));
1431	/// assert_eq!(locs.get(`1`), Some((`0`, `1`)));
1432	/// assert_eq!(locs.get(`2`), Some((`1`, `2`)));
1433	/// assert_eq!(locs.get(`3`), Some((`2`, `6`)));
1434	/// ```
1435	#[inline]
1436	pub fn capture_locations(&self) -> CaptureLocations {
1437	// OK because NFA construction would have failed if this overflowed.
1438	let len = self.pikevm.nfa().group_len().checked_mul(`2`).unwrap();
1439	CaptureLocations(vec![None; len])
1440	}
1441	}
1442
1443	/// Represents a single match of a regex in a haystack.
1444	///
1445	/// A `Match` contains both the start and end byte offsets of the match and the
1446	/// actual substring corresponding to the range of those byte offsets. It is
1447	/// guaranteed that `start <= end`. When `start == end`, the match is empty.
1448	///
1449	/// Since this `Match` can only be produced by the top-level `Regex` APIs
1450	/// that only support searching UTF-8 encoded strings, the byte offsets for a
1451	/// `Match` are guaranteed to fall on valid UTF-8 codepoint boundaries. That
1452	/// is, slicing a `&str` with [`Match::range`] is guaranteed to never panic.
1453	///
1454	/// Values with this type are created by [`Regex::find`] or
1455	/// [`Regex::find_iter`]. Other APIs can create `Match` values too. For
1456	/// example, [`Captures::get`].
1457	///
1458	/// The lifetime parameter `'h` refers to the lifetime of the matched of the
1459	/// haystack that this match was produced from.
1460	///
1461	/// # Numbering
1462	///
1463	/// The byte offsets in a `Match` form a half-open interval. That is, the
1464	/// start of the range is inclusive and the end of the range is exclusive.
1465	/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte
1466	/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and
1467	/// `6` corresponds to `x`, which is one past the end of the match. This
1468	/// corresponds to the same kind of slicing that Rust uses.
1469	///
1470	/// For more on why this was chosen over other schemes (aside from being
1471	/// consistent with how Rust the language works), see [this discussion] and
1472	/// [Dijkstra's note on a related topic][note].
1473	///
1474	/// [this discussion]: https://github.com/rust-lang/regex/discussions/866
1475	/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html
1476	///
1477	/// # Example
1478	///
1479	/// This example shows the value of each of the methods on `Match` for a
1480	/// particular search.
1481	///
1482	/// ```
1483	/// use regex_lite::Regex;
1484	///
1485	/// let re = Regex::new(r"\d+").unwrap();
1486	/// let hay = "numbers: 1234";
1487	/// let m = re.find(hay).unwrap();
1488	/// assert_eq!(`9`, m.start());
1489	/// assert_eq!(`13`, m.end());
1490	/// assert!(!m.is_empty());
1491	/// assert_eq!(`4`, m.len());
1492	/// assert_eq!(`9`..`13`, m.range());
1493	/// assert_eq!("1234", m.as_str());
1494	/// ```
1495	#[derive(Copy, Clone, Eq, PartialEq)]
1496	pub struct Match<'h> {
1497	haystack: &'h str,
1498	start: usize,
1499	end: usize,
1500	}
1501
1502	impl<'h> Match<'h> {
1503	/// Creates a new match from the given haystack and byte offsets.
1504	#[inline]
1505	fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> {
1506	Match { haystack, start, end }
1507	}
1508
1509	/// Returns the byte offset of the start of the match in the haystack. The
1510	/// start of the match corresponds to the position where the match begins
1511	/// and includes the first byte in the match.
1512	///
1513	/// It is guaranteed that `Match::start() <= Match::end()`.
1514	///
1515	/// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That
1516	/// is, it will never be an offset that appears between the UTF-8 code
1517	/// units of a UTF-8 encoded Unicode scalar value. Consequently, it is
1518	/// always safe to slice the corresponding haystack using this offset.
1519	#[inline]
1520	pub fn start(&self) -> usize {
1521	self.start
1522	}
1523
1524	/// Returns the byte offset of the end of the match in the haystack. The
1525	/// end of the match corresponds to the byte immediately following the last
1526	/// byte in the match. This means that `&slice[start..end]` works as one
1527	/// would expect.
1528	///
1529	/// It is guaranteed that `Match::start() <= Match::end()`.
1530	///
1531	/// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That
1532	/// is, it will never be an offset that appears between the UTF-8 code
1533	/// units of a UTF-8 encoded Unicode scalar value. Consequently, it is
1534	/// always safe to slice the corresponding haystack using this offset.
1535	#[inline]
1536	pub fn end(&self) -> usize {
1537	self.end
1538	}
1539
1540	/// Returns true if and only if this match has a length of zero.
1541	///
1542	/// Note that an empty match can only occur when the regex itself can
1543	/// match the empty string. Here are some examples of regexes that can
1544	/// all match the empty string: `^`, `^$`, `\b`, `a?`, `a`, `a{0}`,*
1545	/// `(foo\|\d+\|quux)?`.
1546	#[inline]
1547	pub fn is_empty(&self) -> bool {
1548	self.start == self.end
1549	}
1550
1551	/// Returns the length, in bytes, of this match.
1552	#[inline]
1553	pub fn len(&self) -> usize {
1554	self.end - self.start
1555	}
1556
1557	/// Returns the range over the starting and ending byte offsets of the
1558	/// match in the haystack.
1559	///
1560	/// It is always correct to slice the original haystack searched with this
1561	/// range. That is, because the offsets are guaranteed to fall on valid
1562	/// UTF-8 boundaries, the range returned is always valid.
1563	#[inline]
1564	pub fn range(&self) -> core::ops::Range<usize> {
1565	self.start..self.end
1566	}
1567
1568	/// Returns the substring of the haystack that matched.
1569	#[inline]
1570	pub fn as_str(&self) -> &'h str {
1571	&self.haystack[self.range()]
1572	}
1573	}
1574
1575	impl<'h> core::fmt::Debug for Match<'h> {
1576	fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1577	f&mut DebugStruct<'_, '_>.debug_struct("Match")
1578	.field("start", &self.start)
1579	.field("end", &self.end)
1580	.field(name:"string", &self.as_str())
1581	.finish()
1582	}
1583	}
1584
1585	impl<'h> From<Match<'h>> for &'h str {
1586	fn from(m: Match<'h>) -> &'h str {
1587	m.as_str()
1588	}
1589	}
1590
1591	impl<'h> From<Match<'h>> for core::ops::Range<usize> {
1592	fn from(m: Match<'h>) -> core::ops::Range<usize> {
1593	m.range()
1594	}
1595	}
1596
1597	/// Represents the capture groups for a single match.
1598	///
1599	/// Capture groups refer to parts of a regex enclosed in parentheses. They can
1600	/// be optionally named. The purpose of capture groups is to be able to
1601	/// reference different parts of a match based on the original pattern. For
1602	/// example, say you want to match the individual letters in a 5-letter word:
1603	///
1604	/// ```text
1605	/// (?<first>\w)(\w)(?:\w)\w(?<last>\w)
1606	/// ```
1607	///
1608	/// This regex has 4 capture groups:
1609	///
1610	/// The group at index `0` corresponds to the overall match. It is always*
1611	/// present in every match and never has a name.
1612	/// The group at index `1` with name `first` corresponding to the first*
1613	/// letter.
1614	/// The group at index `2` with no name corresponding to the second letter.*
1615	/// The group at index `3` with name `last` corresponding to the fifth and*
1616	/// last letter.
1617	///
1618	/// Notice that `(?:\w)` was not listed above as a capture group despite it
1619	/// being enclosed in parentheses. That's because `(?:pattern)` is a special
1620	/// syntax that permits grouping but without* capturing. The reason for not*
1621	/// treating it as a capture is that tracking and reporting capture groups
1622	/// requires additional state that may lead to slower searches. So using as few
1623	/// capture groups as possible can help performance. (Although the difference
1624	/// in performance of a couple of capture groups is likely immaterial.)
1625	///
1626	/// Values with this type are created by [`Regex::captures`] or
1627	/// [`Regex::captures_iter`].
1628	///
1629	/// `'h` is the lifetime of the haystack that these captures were matched from.
1630	///
1631	/// # Example
1632	///
1633	/// ```
1634	/// use regex_lite::Regex;
1635	///
1636	/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap();
1637	/// let caps = re.captures("toady").unwrap();
1638	/// assert_eq!("toady", &caps[`0`]);
1639	/// assert_eq!("t", &caps["first"]);
1640	/// assert_eq!("o", &caps[`2`]);
1641	/// assert_eq!("y", &caps["last"]);
1642	/// ```
1643	pub struct Captures<'h> {
1644	haystack: &'h str,
1645	slots: CaptureLocations,
1646	// It's a little weird to put the PikeVM in our Captures, but it's the
1647	// simplest thing to do and is cheap. The PikeVM gives us access to the
1648	// NFA and the NFA gives us access to the capture name<->index mapping.
1649	pikevm: Arc<PikeVM>,
1650	}
1651
1652	impl<'h> Captures<'h> {
1653	/// Returns the `Match` associated with the capture group at index `i`. If
1654	/// `i` does not correspond to a capture group, or if the capture group did
1655	/// not participate in the match, then `None` is returned.
1656	///
1657	/// When `i == 0`, this is guaranteed to return a non-`None` value.
1658	///
1659	/// # Examples
1660	///
1661	/// Get the substring that matched with a default of an empty string if the
1662	/// group didn't participate in the match:
1663	///
1664	/// ```
1665	/// use regex_lite::Regex;
1666	///
1667	/// let re = Regex::new(r"[a-z]+(?:([0-9]+)\|([A-Z]+))").unwrap();
1668	/// let caps = re.captures("abc123").unwrap();
1669	///
1670	/// let substr1 = caps.get(`1`).map_or("", \|m\| m.as_str());
1671	/// let substr2 = caps.get(`2`).map_or("", \|m\| m.as_str());
1672	/// assert_eq!(substr1, "123");
1673	/// assert_eq!(substr2, "");
1674	/// ```
1675	#[inline]
1676	pub fn get(&self, i: usize) -> Option<Match<'h>> {
1677	self.slots.get(i).map(\|(s, e)\| Match::new(self.haystack, s, e))
1678	}
1679
1680	/// Returns the `Match` associated with the capture group named `name`. If
1681	/// `name` isn't a valid capture group or it refers to a group that didn't
1682	/// match, then `None` is returned.
1683	///
1684	/// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime
1685	/// matches the lifetime of the haystack in this `Captures` value.
1686	/// Conversely, the substring returned by `caps["name"]` has a lifetime
1687	/// of the `Captures` value, which is likely shorter than the lifetime of
1688	/// the haystack. In some cases, it may be necessary to use this method to
1689	/// access the matching substring instead of the `caps["name"]` notation.
1690	///
1691	/// # Examples
1692	///
1693	/// Get the substring that matched with a default of an empty string if the
1694	/// group didn't participate in the match:
1695	///
1696	/// ```
1697	/// use regex_lite::Regex;
1698	///
1699	/// let re = Regex::new(
1700	/// r"[a-z]+(?:(?<numbers>[0-9]+)\|(?<letters>[A-Z]+))",
1701	/// ).unwrap();
1702	/// let caps = re.captures("abc123").unwrap();
1703	///
1704	/// let numbers = caps.name("numbers").map_or("", \|m\| m.as_str());
1705	/// let letters = caps.name("letters").map_or("", \|m\| m.as_str());
1706	/// assert_eq!(numbers, "123");
1707	/// assert_eq!(letters, "");
1708	/// ```
1709	#[inline]
1710	pub fn name(&self, name: &str) -> Option<Match<'h>> {
1711	let i = self.pikevm.nfa().to_index(name)?;
1712	self.get(i)
1713	}
1714
1715	/// This is a convenience routine for extracting the substrings
1716	/// corresponding to matching capture groups.
1717	///
1718	/// This returns a tuple where the first element corresponds to the full
1719	/// substring of the haystack that matched the regex. The second element is
1720	/// an array of substrings, with each corresponding to the substring that
1721	/// matched for a particular capture group.
1722	///
1723	/// # Panics
1724	///
1725	/// This panics if the number of possible matching groups in this
1726	/// `Captures` value is not fixed to `N` in all circumstances.
1727	/// More precisely, this routine only works when `N` is equivalent to
1728	/// [`Regex::static_captures_len`].
1729	///
1730	/// Stated more plainly, if the number of matching capture groups in a
1731	/// regex can vary from match to match, then this function always panics.
1732	///
1733	/// For example, `(a)(b)\|(c)` could produce two matching capture groups
1734	/// or one matching capture group for any given match. Therefore, one
1735	/// cannot use `extract` with such a pattern.
1736	///
1737	/// But a pattern like `(a)(b)\|(c)(d)` can be used with `extract` because
1738	/// the number of capture groups in every match is always equivalent,
1739	/// even if the capture _indices_ in each match are not.
1740	///
1741	/// # Example
1742	///
1743	/// ```
1744	/// use regex_lite::Regex;
1745	///
1746	/// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap();
1747	/// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
1748	/// let Some((full, [year, month, day])) =
1749	/// re.captures(hay).map(\|caps\| caps.extract()) else { return };
1750	/// assert_eq!("2010-03-14", full);
1751	/// assert_eq!("2010", year);
1752	/// assert_eq!("03", month);
1753	/// assert_eq!("14", day);
1754	/// ```
1755	///
1756	/// # Example: iteration
1757	///
1758	/// This example shows how to use this method when iterating over all
1759	/// `Captures` matches in a haystack.
1760	///
1761	/// ```
1762	/// use regex_lite::Regex;
1763	///
1764	/// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap();
1765	/// let hay = "1973-01-05, 1975-08-25 and 1980-10-18";
1766	///
1767	/// let mut dates: Vec<(&str, &str, &str)> = vec![];
1768	/// for (_, [y, m, d]) in re.captures_iter(hay).map(\|c\| c.extract()) {
1769	/// dates.push((y, m, d));
1770	/// }
1771	/// assert_eq!(dates, vec![
1772	/// ("1973", "01", "05"),
1773	/// ("1975", "08", "25"),
1774	/// ("1980", "10", "18"),
1775	/// ]);
1776	/// ```
1777	///
1778	/// # Example: parsing different formats
1779	///
1780	/// This API is particularly useful when you need to extract a particular
1781	/// value that might occur in a different format. Consider, for example,
1782	/// an identifier that might be in double quotes or single quotes:
1783	///
1784	/// ```
1785	/// use regex_lite::Regex;
1786	///
1787	/// let re = Regex::new(r#"id:(?:"([^"]+)"\|'([^']+)')"#).unwrap();
1788	/// let hay = r#"The first is id:"foo" and the second is id:'bar'."#;
1789	/// let mut ids = vec![];
1790	/// for (_, [id]) in re.captures_iter(hay).map(\|c\| c.extract()) {
1791	/// ids.push(id);
1792	/// }
1793	/// assert_eq!(ids, vec!["foo", "bar"]);
1794	/// ```
1795	pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) {
1796	let len = self
1797	.pikevm
1798	.nfa()
1799	.static_explicit_captures_len()
1800	.expect("number of capture groups can vary in a match");
1801	assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len);
1802	let mut matched = self.iter().flatten();
1803	let whole_match = matched.next().expect("a match").as_str();
1804	let group_matches = [`0`; N].map(\|_\| {
1805	matched.next().expect("too few matching groups").as_str()
1806	});
1807	(whole_match, group_matches)
1808	}
1809
1810	/// Expands all instances of `$ref` in `replacement` to the corresponding
1811	/// capture group, and writes them to the `dst` buffer given. A `ref` can
1812	/// be a capture group index or a name. If `ref` doesn't refer to a capture
1813	/// group that participated in the match, then it is replaced with the
1814	/// empty string.
1815	///
1816	/// # Format
1817	///
1818	/// The format of the replacement string supports two different kinds of
1819	/// capture references: unbraced and braced.
1820	///
1821	/// For the unbraced format, the format supported is `$ref` where `name`
1822	/// can be any character in the class `[0-9A-Za-z_]`. `ref` is always
1823	/// the longest possible parse. So for example, `$1a` corresponds to the
1824	/// capture group named `1a` and not the capture group at index `1`. If
1825	/// `ref` matches `^[0-9]+$`, then it is treated as a capture group index
1826	/// itself and not a name.
1827	///
1828	/// For the braced format, the format supported is `${ref}` where `ref` can
1829	/// be any sequence of bytes except for `}`. If no closing brace occurs,
1830	/// then it is not considered a capture reference. As with the unbraced
1831	/// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture
1832	/// group index and not a name.
1833	///
1834	/// The braced format is useful for exerting precise control over the name
1835	/// of the capture reference. For example, `${1}a` corresponds to the
1836	/// capture group reference `1` followed by the letter `a`, where as `$1a`
1837	/// (as mentioned above) corresponds to the capture group reference `1a`.
1838	/// The braced format is also useful for expressing capture group names
1839	/// that use characters not supported by the unbraced format. For example,
1840	/// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`.
1841	///
1842	/// If a capture group reference is found and it does not refer to a valid
1843	/// capture group, then it will be replaced with the empty string.
1844	///
1845	/// To write a literal `$`, use `$$`.
1846	///
1847	/// # Example
1848	///
1849	/// ```
1850	/// use regex_lite::Regex;
1851	///
1852	/// let re = Regex::new(
1853	/// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
1854	/// ).unwrap();
1855	/// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
1856	/// let caps = re.captures(hay).unwrap();
1857	///
1858	/// let mut dst = String::new();
1859	/// caps.expand("year=$year, month=$month, day=$day", &mut dst);
1860	/// assert_eq!(dst, "year=2010, month=03, day=14");
1861	/// ```
1862	#[inline]
1863	pub fn expand(&self, replacement: &str, dst: &mut String) {
1864	interpolate::string(
1865	replacement,
1866	\|index, dst\| {
1867	let m = match self.get(index) {
1868	None => return,
1869	Some(m) => m,
1870	};
1871	dst.push_str(&self.haystack[m.range()]);
1872	},
1873	\|name\| self.pikevm.nfa().to_index(name),
1874	dst,
1875	);
1876	}
1877
1878	/// Returns an iterator over all capture groups. This includes both
1879	/// matching and non-matching groups.
1880	///
1881	/// The iterator always yields at least one matching group: the first group
1882	/// (at index `0`) with no name. Subsequent groups are returned in the order
1883	/// of their opening parenthesis in the regex.
1884	///
1885	/// The elements yielded have type `Option<Match<'h>>`, where a non-`None`
1886	/// value is present if the capture group matches.
1887	///
1888	/// # Example
1889	///
1890	/// ```
1891	/// use regex_lite::Regex;
1892	///
1893	/// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap();
1894	/// let caps = re.captures("AZ").unwrap();
1895	///
1896	/// let mut it = caps.iter();
1897	/// assert_eq!(it.next().unwrap().map(\|m\| m.as_str()), Some("AZ"));
1898	/// assert_eq!(it.next().unwrap().map(\|m\| m.as_str()), Some("A"));
1899	/// assert_eq!(it.next().unwrap().map(\|m\| m.as_str()), None);
1900	/// assert_eq!(it.next().unwrap().map(\|m\| m.as_str()), Some("Z"));
1901	/// assert_eq!(it.next(), None);
1902	/// ```
1903	#[inline]
1904	pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> {
1905	SubCaptureMatches {
1906	caps: self,
1907	it: self.pikevm.nfa().capture_names().enumerate(),
1908	}
1909	}
1910
1911	/// Returns the total number of capture groups. This includes both
1912	/// matching and non-matching groups.
1913	///
1914	/// The length returned is always equivalent to the number of elements
1915	/// yielded by [`Captures::iter`]. Consequently, the length is always
1916	/// greater than zero since every `Captures` value always includes the
1917	/// match for the entire regex.
1918	///
1919	/// # Example
1920	///
1921	/// ```
1922	/// use regex_lite::Regex;
1923	///
1924	/// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap();
1925	/// let caps = re.captures("AZ").unwrap();
1926	/// assert_eq!(caps.len(), `4`);
1927	/// ```
1928	#[inline]
1929	pub fn len(&self) -> usize {
1930	self.pikevm.nfa().group_len()
1931	}
1932	}
1933
1934	impl<'h> core::fmt::Debug for Captures<'h> {
1935	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1936	/// A little helper type to provide a nice map-like debug
1937	/// representation for our capturing group spans.
1938	///
1939	/// regex-automata has something similar, but it includes the pattern
1940	/// ID in its debug output, which is confusing. It also doesn't include
1941	/// that strings that match because a regex-automata `Captures` doesn't
1942	/// borrow the haystack.
1943	struct CapturesDebugMap<'a> {
1944	caps: &'a Captures<'a>,
1945	}
1946
1947	impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
1948	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1949	let mut map = f.debug_map();
1950	let names = self.caps.pikevm.nfa().capture_names();
1951	for (group_index, maybe_name) in names.enumerate() {
1952	let key = Key(group_index, maybe_name);
1953	match self.caps.get(group_index) {
1954	None => map.entry(&key, &None::<()>),
1955	Some(mat) => map.entry(&key, &Value(mat)),
1956	};
1957	}
1958	map.finish()
1959	}
1960	}
1961
1962	struct Key<'a>(usize, Option<&'a str>);
1963
1964	impl<'a> core::fmt::Debug for Key<'a> {
1965	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1966	write!(f, "{}", self.0)?;
1967	if let Some(name) = self.1 {
1968	write!(f, "/{:?}", name)?;
1969	}
1970	Ok(())
1971	}
1972	}
1973
1974	struct Value<'a>(Match<'a>);
1975
1976	impl<'a> core::fmt::Debug for Value<'a> {
1977	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1978	write!(
1979	f,
1980	"{}..{}/{:?}",
1981	self.0.start(),
1982	self.0.end(),
1983	self.0.as_str()
1984	)
1985	}
1986	}
1987
1988	f.debug_tuple("Captures")
1989	.field(&CapturesDebugMap { caps: self })
1990	.finish()
1991	}
1992	}
1993
1994	/// Get a matching capture group's haystack substring by index.
1995	///
1996	/// The haystack substring returned can't outlive the `Captures` object if this
1997	/// method is used, because of how `Index` is defined (normally `a[i]` is part
1998	/// of `a` and can't outlive it). To work around this limitation, do that, use
1999	/// [`Captures::get`] instead.
2000	///
2001	/// `'h` is the lifetime of the matched haystack, but the lifetime of the
2002	/// `&str` returned by this implementation is the lifetime of the `Captures`
2003	/// value itself.
2004	///
2005	/// # Panics
2006	///
2007	/// If there is no matching group at the given index.
2008	impl<'h> core::ops::Index<usize> for Captures<'h> {
2009	type Output = str;
2010
2011	// The lifetime is written out to make it clear that the &str returned
2012	// does NOT have a lifetime equivalent to 'h.
2013	fn index(&self, i: usize) -> &str {
2014	self.get(i)
2015	.map(\|m: Match<'h>\| m.as_str())
2016	.unwrap_or_else(\|\| panic!("no group at index '{}'", i))
2017	}
2018	}
2019
2020	/// Get a matching capture group's haystack substring by name.
2021	///
2022	/// The haystack substring returned can't outlive the `Captures` object if this
2023	/// method is used, because of how `Index` is defined (normally `a[i]` is part
2024	/// of `a` and can't outlive it). To work around this limitation, do that, use
2025	/// [`Captures::get`] instead.
2026	///
2027	/// `'h` is the lifetime of the matched haystack, but the lifetime of the
2028	/// `&str` returned by this implementation is the lifetime of the `Captures`
2029	/// value itself.
2030	///
2031	/// `'n` is the lifetime of the group name used to index the `Captures` value.
2032	///
2033	/// # Panics
2034	///
2035	/// If there is no matching group at the given name.
2036	impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
2037	type Output = str;
2038
2039	fn index<'a>(&'a self, name: &'n str) -> &'a str {
2040	self.name(name)
2041	.map(\|m: Match<'h>\| m.as_str())
2042	.unwrap_or_else(\|\| panic!("no group named '{}'", name))
2043	}
2044	}
2045
2046	/// A low level representation of the byte offsets of each capture group.
2047	///
2048	/// You can think of this as a lower level [`Captures`], where this type does
2049	/// not support named capturing groups directly and it does not borrow the
2050	/// haystack that these offsets were matched on.
2051	///
2052	/// Primarily, this type is useful when using the lower level `Regex` APIs such
2053	/// as [`Regex::captures_read`], which permits amortizing the allocation in
2054	/// which capture match offsets are stored.
2055	///
2056	/// In order to build a value of this type, you'll need to call the
2057	/// [`Regex::capture_locations`] method. The value returned can then be reused
2058	/// in subsequent searches for that regex. Using it for other regexes may
2059	/// result in a panic or otherwise incorrect results.
2060	///
2061	/// # Example
2062	///
2063	/// This example shows how to create and use `CaptureLocations` in a search.
2064	///
2065	/// ```
2066	/// use regex_lite::Regex;
2067	///
2068	/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2069	/// let mut locs = re.capture_locations();
2070	/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2071	/// assert_eq!(`0`..`17`, m.range());
2072	/// assert_eq!(Some((`0`, `17`)), locs.get(`0`));
2073	/// assert_eq!(Some((`0`, `5`)), locs.get(`1`));
2074	/// assert_eq!(Some((`6`, `17`)), locs.get(`2`));
2075	///
2076	/// // Asking for an invalid capture group always returns None.
2077	/// assert_eq!(None, locs.get(`3`));
2078	/// # // literals are too big for 32-bit usize: #1041
2079	/// # #[cfg(target_pointer_width = "64")]
2080	/// assert_eq!(None, locs.get(`34973498648`));
2081	/// # #[cfg(target_pointer_width = "64")]
2082	/// assert_eq!(None, locs.get(`9944060567225171988`));
2083	/// ```
2084	#[derive(Clone, Debug)]
2085	pub struct CaptureLocations(Vec<Option<NonMaxUsize>>);
2086
2087	impl CaptureLocations {
2088	/// Returns the start and end byte offsets of the capture group at index
2089	/// `i`. This returns `None` if `i` is not a valid capture group or if the
2090	/// capture group did not match.
2091	///
2092	/// # Example
2093	///
2094	/// ```
2095	/// use regex_lite::Regex;
2096	///
2097	/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2098	/// let mut locs = re.capture_locations();
2099	/// re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2100	/// assert_eq!(Some((`0`, `17`)), locs.get(`0`));
2101	/// assert_eq!(Some((`0`, `5`)), locs.get(`1`));
2102	/// assert_eq!(Some((`6`, `17`)), locs.get(`2`));
2103	/// ```
2104	#[inline]
2105	pub fn get(&self, i: usize) -> Option<(usize, usize)> {
2106	let slot = i.checked_mul(`2`)?;
2107	let start = self.0.get(slot).copied()??.get();
2108	let slot = slot.checked_add(`1`)?;
2109	let end = self.0.get(slot).copied()??.get();
2110	Some((start, end))
2111	}
2112
2113	/// Returns the total number of capture groups (even if they didn't match).
2114	/// That is, the length returned is unaffected by the result of a search.
2115	///
2116	/// This is always at least `1` since every regex has at least `1`
2117	/// capturing group that corresponds to the entire match.
2118	///
2119	/// # Example
2120	///
2121	/// ```
2122	/// use regex_lite::Regex;
2123	///
2124	/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2125	/// let mut locs = re.capture_locations();
2126	/// assert_eq!(`3`, locs.len());
2127	/// re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2128	/// assert_eq!(`3`, locs.len());
2129	/// ```
2130	///
2131	/// Notice that the length is always at least `1`, regardless of the regex:
2132	///
2133	/// ```
2134	/// use regex_lite::Regex;
2135	///
2136	/// let re = Regex::new(r"").unwrap();
2137	/// let locs = re.capture_locations();
2138	/// assert_eq!(`1`, locs.len());
2139	///
2140	/// // [a&&b] is a regex that never matches anything.
2141	/// let re = Regex::new(r"[^\s\S]").unwrap();
2142	/// let locs = re.capture_locations();
2143	/// assert_eq!(`1`, locs.len());
2144	/// ```
2145	#[inline]
2146	pub fn len(&self) -> usize {
2147	// We always have twice as many slots as groups.
2148	self.0.len().checked_shr(`1`).unwrap()
2149	}
2150	}
2151
2152	/// An iterator over all non-overlapping matches in a haystack.
2153	///
2154	/// This iterator yields [`Match`] values. The iterator stops when no more
2155	/// matches can be found.
2156	///
2157	/// `'r` is the lifetime of the compiled regular expression and `'h` is the
2158	/// lifetime of the haystack.
2159	///
2160	/// This iterator is created by [`Regex::find_iter`].
2161	///
2162	/// # Time complexity
2163	///
2164	/// Note that since an iterator runs potentially many searches on the haystack
2165	/// and since each search has worst case `O(m n)` time complexity, the*
2166	/// overall worst case time complexity for iteration is `O(m n^2)`.*
2167	#[derive(Debug)]
2168	pub struct Matches<'r, 'h> {
2169	haystack: &'h str,
2170	it: pikevm::FindMatches<'r, 'h>,
2171	}
2172
2173	impl<'r, 'h> Iterator for Matches<'r, 'h> {
2174	type Item = Match<'h>;
2175
2176	#[inline]
2177	fn next(&mut self) -> Option<Match<'h>> {
2178	self.it.next().map(\|(s: usize, e: usize)\| Match::new(self.haystack, start:s, end:e))
2179	}
2180
2181	#[inline]
2182	fn count(self) -> usize {
2183	self.it.count()
2184	}
2185	}
2186
2187	impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {}
2188
2189	/// An iterator over all non-overlapping capture matches in a haystack.
2190	///
2191	/// This iterator yields [`Captures`] values. The iterator stops when no more
2192	/// matches can be found.
2193	///
2194	/// `'r` is the lifetime of the compiled regular expression and `'h` is the
2195	/// lifetime of the matched string.
2196	///
2197	/// This iterator is created by [`Regex::captures_iter`].
2198	///
2199	/// # Time complexity
2200	///
2201	/// Note that since an iterator runs potentially many searches on the haystack
2202	/// and since each search has worst case `O(m n)` time complexity, the*
2203	/// overall worst case time complexity for iteration is `O(m n^2)`.*
2204	#[derive(Debug)]
2205	pub struct CaptureMatches<'r, 'h> {
2206	haystack: &'h str,
2207	re: &'r Regex,
2208	it: pikevm::CapturesMatches<'r, 'h>,
2209	}
2210
2211	impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> {
2212	type Item = Captures<'h>;
2213
2214	#[inline]
2215	fn next(&mut self) -> Option<Captures<'h>> {
2216	self.it.next().map(\|slots: Vec>\| Captures {
2217	haystack: self.haystack,
2218	slots: CaptureLocations(slots),
2219	pikevm: Arc::clone(&self.re.pikevm),
2220	})
2221	}
2222
2223	#[inline]
2224	fn count(self) -> usize {
2225	self.it.count()
2226	}
2227	}
2228
2229	impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {}
2230
2231	/// An iterator over all substrings delimited by a regex match.
2232	///
2233	/// `'r` is the lifetime of the compiled regular expression and `'h` is the
2234	/// lifetime of the byte string being split.
2235	///
2236	/// This iterator is created by [`Regex::split`].
2237	///
2238	/// # Time complexity
2239	///
2240	/// Note that since an iterator runs potentially many searches on the haystack
2241	/// and since each search has worst case `O(m n)` time complexity, the*
2242	/// overall worst case time complexity for iteration is `O(m n^2)`.*
2243	#[derive(Debug)]
2244	pub struct Split<'r, 'h> {
2245	haystack: &'h str,
2246	finder: Matches<'r, 'h>,
2247	last: usize,
2248	}
2249
2250	impl<'r, 'h> Iterator for Split<'r, 'h> {
2251	type Item = &'h str;
2252
2253	#[inline]
2254	fn next(&mut self) -> Option<&'h str> {
2255	match self.finder.next() {
2256	None => {
2257	let len: usize = self.haystack.len();
2258	if self.last > len {
2259	None
2260	} else {
2261	let range: Range = self.last..len;
2262	self.last = len + `1`; // Next call will return None
2263	Some(&self.haystack[range])
2264	}
2265	}
2266	Some(m: Match<'h>) => {
2267	let range: Range = self.last..m.start();
2268	self.last = m.end();
2269	Some(&self.haystack[range])
2270	}
2271	}
2272	}
2273	}
2274
2275	impl<'r, 't> core::iter::FusedIterator for Split<'r, 't> {}
2276
2277	/// An iterator over at most `N` substrings delimited by a regex match.
2278	///
2279	/// The last substring yielded by this iterator will be whatever remains after
2280	/// `N-1` splits.
2281	///
2282	/// `'r` is the lifetime of the compiled regular expression and `'h` is the
2283	/// lifetime of the byte string being split.
2284	///
2285	/// This iterator is created by [`Regex::splitn`].
2286	///
2287	/// # Time complexity
2288	///
2289	/// Note that since an iterator runs potentially many searches on the haystack
2290	/// and since each search has worst case `O(m n)` time complexity, the*
2291	/// overall worst case time complexity for iteration is `O(m n^2)`.*
2292	///
2293	/// Although note that the worst case time here has an upper bound given
2294	/// by the `limit` parameter to [`Regex::splitn`].
2295	#[derive(Debug)]
2296	pub struct SplitN<'r, 'h> {
2297	splits: Split<'r, 'h>,
2298	limit: usize,
2299	}
2300
2301	impl<'r, 'h> Iterator for SplitN<'r, 'h> {
2302	type Item = &'h str;
2303
2304	#[inline]
2305	fn next(&mut self) -> Option<&'h str> {
2306	if self.limit == `0` {
2307	return None;
2308	}
2309
2310	self.limit -= `1`;
2311	if self.limit > `0` {
2312	return self.splits.next();
2313	}
2314
2315	let len = self.splits.haystack.len();
2316	if self.splits.last > len {
2317	// We've already returned all substrings.
2318	None
2319	} else {
2320	// self.n == 0, so future calls will return None immediately
2321	Some(&self.splits.haystack[self.splits.last..len])
2322	}
2323	}
2324
2325	#[inline]
2326	fn size_hint(&self) -> (usize, Option<usize>) {
2327	self.splits.size_hint()
2328	}
2329	}
2330
2331	impl<'r, 't> core::iter::FusedIterator for SplitN<'r, 't> {}
2332
2333	/// An iterator over the names of all capture groups in a regex.
2334	///
2335	/// This iterator yields values of type `Option<&str>` in order of the opening
2336	/// capture group parenthesis in the regex pattern. `None` is yielded for
2337	/// groups with no name. The first element always corresponds to the implicit
2338	/// and unnamed group for the overall match.
2339	///
2340	/// `'r` is the lifetime of the compiled regular expression.
2341	///
2342	/// This iterator is created by [`Regex::capture_names`].
2343	#[derive(Clone, Debug)]
2344	pub struct CaptureNames<'r>(nfa::CaptureNames<'r>);
2345
2346	impl<'r> Iterator for CaptureNames<'r> {
2347	type Item = Option<&'r str>;
2348
2349	#[inline]
2350	fn next(&mut self) -> Option<Option<&'r str>> {
2351	self.0.next()
2352	}
2353
2354	#[inline]
2355	fn size_hint(&self) -> (usize, Option<usize>) {
2356	self.0.size_hint()
2357	}
2358
2359	#[inline]
2360	fn count(self) -> usize {
2361	self.0.count()
2362	}
2363	}
2364
2365	impl<'r> ExactSizeIterator for CaptureNames<'r> {}
2366
2367	impl<'r> core::iter::FusedIterator for CaptureNames<'r> {}
2368
2369	/// An iterator over all group matches in a [`Captures`] value.
2370	///
2371	/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the
2372	/// lifetime of the haystack that the matches are for. The order of elements
2373	/// yielded corresponds to the order of the opening parenthesis for the group
2374	/// in the regex pattern. `None` is yielded for groups that did not participate
2375	/// in the match.
2376	///
2377	/// The first element always corresponds to the implicit group for the overall
2378	/// match. Since this iterator is created by a [`Captures`] value, and a
2379	/// `Captures` value is only created when a match occurs, it follows that the
2380	/// first element yielded by this iterator is guaranteed to be non-`None`.
2381	///
2382	/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that
2383	/// created this iterator, and the lifetime `'h` corresponds to the originally
2384	/// matched haystack.
2385	#[derive(Clone, Debug)]
2386	pub struct SubCaptureMatches<'c, 'h> {
2387	caps: &'c Captures<'h>,
2388	it: core::iter::Enumerate<nfa::CaptureNames<'c>>,
2389	}
2390
2391	impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> {
2392	type Item = Option<Match<'h>>;
2393
2394	#[inline]
2395	fn next(&mut self) -> Option<Option<Match<'h>>> {
2396	let (group_index: usize, _) = self.it.next()?;
2397	Some(self.caps.get(group_index))
2398	}
2399
2400	#[inline]
2401	fn size_hint(&self) -> (usize, Option<usize>) {
2402	self.it.size_hint()
2403	}
2404
2405	#[inline]
2406	fn count(self) -> usize {
2407	self.it.count()
2408	}
2409	}
2410
2411	impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {}
2412
2413	impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {}
2414
2415	/// A trait for types that can be used to replace matches in a haystack.
2416	///
2417	/// In general, users of this crate shouldn't need to implement this trait,
2418	/// since implementations are already provided for `&str` along with other
2419	/// variants of string types, as well as `FnMut(&Captures) -> String` (or any
2420	/// `FnMut(&Captures) -> T` where `T: AsRef<str>`). Those cover most use cases,
2421	/// but callers can implement this trait directly if necessary.
2422	///
2423	/// # Example
2424	///
2425	/// This example shows a basic implementation of the `Replacer` trait. This
2426	/// can be done much more simply using the replacement string interpolation
2427	/// support (e.g., `$first $last`), but this approach avoids needing to parse
2428	/// the replacement string at all.
2429	///
2430	/// ```
2431	/// use regex_lite::{Captures, Regex, Replacer};
2432	///
2433	/// struct NameSwapper;
2434	///
2435	/// impl Replacer for NameSwapper {
2436	/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2437	/// dst.push_str(&caps["first"]);
2438	/// dst.push_str(" ");
2439	/// dst.push_str(&caps["last"]);
2440	/// }
2441	/// }
2442	///
2443	/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap();
2444	/// let result = re.replace("Springsteen, Bruce", NameSwapper);
2445	/// assert_eq!(result, "Bruce Springsteen");
2446	/// ```
2447	pub trait Replacer {
2448	/// Appends possibly empty data to `dst` to replace the current match.
2449	///
2450	/// The current match is represented by `caps`, which is guaranteed to
2451	/// have a match at capture group `0`.
2452	///
2453	/// For example, a no-op replacement would be `dst.push_str(&caps[0])`.
2454	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String);
2455
2456	/// Return a fixed unchanging replacement string.
2457	///
2458	/// When doing replacements, if access to [`Captures`] is not needed (e.g.,
2459	/// the replacement string does not need `$` expansion), then it can be
2460	/// beneficial to avoid finding sub-captures.
2461	///
2462	/// In general, this is called once for every call to a replacement routine
2463	/// such as [`Regex::replace_all`].
2464	fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
2465	None
2466	}
2467
2468	/// Returns a type that implements `Replacer`, but that borrows and wraps
2469	/// this `Replacer`.
2470	///
2471	/// This is useful when you want to take a generic `Replacer` (which might
2472	/// not be cloneable) and use it without consuming it, so it can be used
2473	/// more than once.
2474	///
2475	/// # Example
2476	///
2477	/// ```
2478	/// use regex_lite::{Regex, Replacer};
2479	///
2480	/// fn replace_all_twice<R: Replacer>(
2481	/// re: Regex,
2482	/// src: &str,
2483	/// mut rep: R,
2484	/// ) -> String {
2485	/// let dst = re.replace_all(src, rep.by_ref());
2486	/// let dst = re.replace_all(&dst, rep.by_ref());
2487	/// dst.into_owned()
2488	/// }
2489	/// ```
2490	fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
2491	ReplacerRef(self)
2492	}
2493	}
2494
2495	impl<'a> Replacer for &'a str {
2496	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2497	caps.expand(*self, dst);
2498	}
2499
2500	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2501	no_expansion(self)
2502	}
2503	}
2504
2505	impl<'a> Replacer for &'a String {
2506	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2507	self.as_str().replace_append(caps, dst)
2508	}
2509
2510	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2511	no_expansion(self)
2512	}
2513	}
2514
2515	impl Replacer for String {
2516	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2517	self.as_str().replace_append(caps, dst)
2518	}
2519
2520	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2521	no_expansion(self)
2522	}
2523	}
2524
2525	impl<'a> Replacer for Cow<'a, str> {
2526	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2527	self.as_ref().replace_append(caps, dst)
2528	}
2529
2530	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2531	no_expansion(self)
2532	}
2533	}
2534
2535	impl<'a> Replacer for &'a Cow<'a, str> {
2536	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2537	self.as_ref().replace_append(caps, dst)
2538	}
2539
2540	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2541	no_expansion(self)
2542	}
2543	}
2544
2545	impl<F, T> Replacer for F
2546	where
2547	F: FnMut(&Captures<'_>) -> T,
2548	T: AsRef<str>,
2549	{
2550	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2551	dst.push_str((*self)(caps).as_ref());
2552	}
2553	}
2554
2555	/// A by-reference adaptor for a [`Replacer`].
2556	///
2557	/// This permits reusing the same `Replacer` value in multiple calls to a
2558	/// replacement routine like [`Regex::replace_all`].
2559	///
2560	/// This type is created by [`Replacer::by_ref`].
2561	#[derive(Debug)]
2562	pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
2563
2564	impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
2565	fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2566	self.0.replace_append(caps, dst)
2567	}
2568
2569	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2570	self.0.no_expansion()
2571	}
2572	}
2573
2574	/// A helper type for forcing literal string replacement.
2575	///
2576	/// It can be used with routines like [`Regex::replace`] and
2577	/// [`Regex::replace_all`] to do a literal string replacement without expanding
2578	/// `$name` to their corresponding capture groups. This can be both convenient
2579	/// (to avoid escaping `$`, for example) and faster (since capture groups
2580	/// don't need to be found).
2581	///
2582	/// `'s` is the lifetime of the literal string to use.
2583	///
2584	/// # Example
2585	///
2586	/// ```
2587	/// use regex_lite::{NoExpand, Regex};
2588	///
2589	/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap();
2590	/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
2591	/// assert_eq!(result, "$2 $last");
2592	/// ```
2593	#[derive(Clone, Debug)]
2594	pub struct NoExpand<'t>(pub &'t str);
2595
2596	impl<'t> Replacer for NoExpand<'t> {
2597	fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) {
2598	dst.push_str(self.0);
2599	}
2600
2601	fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2602	Some(Cow::Borrowed(self.0))
2603	}
2604	}
2605
2606	/// Quickly checks the given replacement string for whether interpolation
2607	/// should be done on it. It returns `None` if a `$` was found anywhere in the
2608	/// given string, which suggests interpolation needs to be done. But if there's
2609	/// no `$` anywhere, then interpolation definitely does not need to be done. In
2610	/// that case, the given string is returned as a borrowed `Cow`.
2611	///
2612	/// This is meant to be used to implement the `Replacer::no_expandsion` method
2613	/// in its various trait impls.
2614	fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> {
2615	let s: &str = t.as_ref();
2616	match s.find('$') {
2617	Some(_) => None,
2618	None => Some(Cow::Borrowed(s)),
2619	}
2620	}
2621
2622	/// A configurable builder for a [`Regex`].
2623	///
2624	/// This builder can be used to programmatically set flags such as `i` (case
2625	/// insensitive) and `x` (for verbose mode). This builder can also be used to
2626	/// configure things like a size limit on the compiled regular expression.
2627	#[derive(Debug)]
2628	pub struct RegexBuilder {
2629	pattern: String,
2630	hir_config: hir::Config,
2631	nfa_config: nfa::Config,
2632	}
2633
2634	impl RegexBuilder {
2635	/// Create a new builder with a default configuration for the given
2636	/// pattern.
2637	///
2638	/// If the pattern is invalid or exceeds the configured size limits, then
2639	/// an error will be returned when [`RegexBuilder::build`] is called.
2640	pub fn new(pattern: &str) -> RegexBuilder {
2641	RegexBuilder {
2642	pattern: pattern.to_string(),
2643	hir_config: hir::Config::default(),
2644	nfa_config: nfa::Config::default(),
2645	}
2646	}
2647
2648	/// Compiles the pattern given to `RegexBuilder::new` with the
2649	/// configuration set on this builder.
2650	///
2651	/// If the pattern isn't a valid regex or if a configured size limit was
2652	/// exceeded, then an error is returned.
2653	pub fn build(&self) -> Result<Regex, Error> {
2654	let hir = Hir::parse(self.hir_config, &self.pattern)?;
2655	let nfa = NFA::new(self.nfa_config, self.pattern.clone(), &hir)?;
2656	let pikevm = Arc::new(PikeVM::new(nfa));
2657	let pool = {
2658	let pikevm = Arc::clone(&pikevm);
2659	let create = Box::new(move \|\| Cache::new(&pikevm));
2660	CachePool::new(create)
2661	};
2662	Ok(Regex { pikevm, pool })
2663	}
2664
2665	/// This configures whether to enable ASCII case insensitive matching for
2666	/// the entire pattern.
2667	///
2668	/// This setting can also be configured using the inline flag `i`
2669	/// in the pattern. For example, `(?i:foo)` matches `foo` case
2670	/// insensitively while `(?-i:foo)` matches `foo` case sensitively.
2671	///
2672	/// The default for this is `false`.
2673	///
2674	/// # Example
2675	///
2676	/// ```
2677	/// use regex_lite::RegexBuilder;
2678	///
2679	/// let re = RegexBuilder::new(r"foo(?-i:bar)quux")
2680	/// .case_insensitive(`true`)
2681	/// .build()
2682	/// .unwrap();
2683	/// assert!(re.is_match("FoObarQuUx"));
2684	/// // Even though case insensitive matching is enabled in the builder,
2685	/// // it can be locally disabled within the pattern. In this case,
2686	/// // `bar` is matched case sensitively.
2687	/// assert!(!re.is_match("fooBARquux"));
2688	/// ```
2689	pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {
2690	self.hir_config.flags.case_insensitive = yes;
2691	self
2692	}
2693
2694	/// This configures multi-line mode for the entire pattern.
2695	///
2696	/// Enabling multi-line mode changes the behavior of the `^` and `$` anchor
2697	/// assertions. Instead of only matching at the beginning and end of a
2698	/// haystack, respectively, multi-line mode causes them to match at the
2699	/// beginning and end of a line in addition* to the beginning and end of*
2700	/// a haystack. More precisely, `^` will match at the position immediately
2701	/// following a `\n` and `$` will match at the position immediately
2702	/// preceding a `\n`.
2703	///
2704	/// The behavior of this option is impacted by the [`RegexBuilder::crlf`]
2705	/// setting. Namely, CRLF mode changes the line terminator to be either
2706	/// `\r` or `\n`, but never at the position between a `\r` and `\`n.
2707	///
2708	/// This setting can also be configured using the inline flag `m` in the
2709	/// pattern.
2710	///
2711	/// The default for this is `false`.
2712	///
2713	/// # Example
2714	///
2715	/// ```
2716	/// use regex_lite::RegexBuilder;
2717	///
2718	/// let re = RegexBuilder::new(r"^foo$")
2719	/// .multi_line(`true`)
2720	/// .build()
2721	/// .unwrap();
2722	/// assert_eq!(Some(`1`..`4`), re.find("`\n`foo`\n`").map(\|m\| m.range()));
2723	/// ```
2724	pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
2725	self.hir_config.flags.multi_line = yes;
2726	self
2727	}
2728
2729	/// This configures dot-matches-new-line mode for the entire pattern.
2730	///
2731	/// Perhaps surprisingly, the default behavior for `.` is not to match
2732	/// any character, but rather, to match any character except for the line
2733	/// terminator (which is `\n` by default). When this mode is enabled, the
2734	/// behavior changes such that `.` truly matches any character.
2735	///
2736	/// This setting can also be configured using the inline flag `s` in the
2737	/// pattern.
2738	///
2739	/// The default for this is `false`.
2740	///
2741	/// # Example
2742	///
2743	/// ```
2744	/// use regex_lite::RegexBuilder;
2745	///
2746	/// let re = RegexBuilder::new(r"foo.bar")
2747	/// .dot_matches_new_line(`true`)
2748	/// .build()
2749	/// .unwrap();
2750	/// let hay = "foo`\n`bar";
2751	/// assert_eq!(Some("foo`\n`bar"), re.find(hay).map(\|m\| m.as_str()));
2752	/// ```
2753	pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder {
2754	self.hir_config.flags.dot_matches_new_line = yes;
2755	self
2756	}
2757
2758	/// This configures CRLF mode for the entire pattern.
2759	///
2760	/// When CRLF mode is enabled, both `\r` ("carriage return" or CR for
2761	/// short) and `\n` ("line feed" or LF for short) are treated as line
2762	/// terminators. This results in the following:
2763	///
2764	/// Unless dot-matches-new-line mode is enabled, `.` will now match any*
2765	/// character except for `\n` and `\r`.
2766	/// When multi-line mode is enabled, `^` will match immediately*
2767	/// following a `\n` or a `\r`. Similarly, `$` will match immediately
2768	/// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match between
2769	/// `\r` and `\n`.
2770	///
2771	/// This setting can also be configured using the inline flag `R` in
2772	/// the pattern.
2773	///
2774	/// The default for this is `false`.
2775	///
2776	/// # Example
2777	///
2778	/// ```
2779	/// use regex_lite::RegexBuilder;
2780	///
2781	/// let re = RegexBuilder::new(r"^foo$")
2782	/// .multi_line(`true`)
2783	/// .crlf(`true`)
2784	/// .build()
2785	/// .unwrap();
2786	/// let hay = "`\r\n`foo`\r\n`";
2787	/// // If CRLF mode weren't enabled here, then '$' wouldn't match
2788	/// // immediately after 'foo', and thus no match would be found.
2789	/// assert_eq!(Some("foo"), re.find(hay).map(\|m\| m.as_str()));
2790	/// ```
2791	///
2792	/// This example demonstrates that `^` will never match at a position
2793	/// between `\r` and `\n`. (`$` will similarly not match between a `\r`
2794	/// and a `\n`.)
2795	///
2796	/// ```
2797	/// use regex_lite::RegexBuilder;
2798	///
2799	/// let re = RegexBuilder::new(r"^")
2800	/// .multi_line(`true`)
2801	/// .crlf(`true`)
2802	/// .build()
2803	/// .unwrap();
2804	/// let hay = "`\r\n\r\n`";
2805	/// let ranges: Vec<_> = re.find_iter(hay).map(\|m\| m.range()).collect();
2806	/// assert_eq!(ranges, vec![`0`..`0`, `2`..`2`, `4`..`4`]);
2807	/// ```
2808	pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder {
2809	self.hir_config.flags.crlf = yes;
2810	self
2811	}
2812
2813	/// This configures swap-greed mode for the entire pattern.
2814	///
2815	/// When swap-greed mode is enabled, patterns like `a+` will become
2816	/// non-greedy and patterns like `a+?` will become greedy. In other words,
2817	/// the meanings of `a+` and `a+?` are switched.
2818	///
2819	/// This setting can also be configured using the inline flag `U` in the
2820	/// pattern.
2821	///
2822	/// The default for this is `false`.
2823	///
2824	/// # Example
2825	///
2826	/// ```
2827	/// use regex_lite::RegexBuilder;
2828	///
2829	/// let re = RegexBuilder::new(r"a+")
2830	/// .swap_greed(`true`)
2831	/// .build()
2832	/// .unwrap();
2833	/// assert_eq!(Some("a"), re.find("aaa").map(\|m\| m.as_str()));
2834	/// ```
2835	pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
2836	self.hir_config.flags.swap_greed = yes;
2837	self
2838	}
2839
2840	/// This configures verbose mode for the entire pattern.
2841	///
2842	/// When enabled, whitespace will treated as insignifcant in the pattern
2843	/// and `#` can be used to start a comment until the next new line.
2844	///
2845	/// Normally, in most places in a pattern, whitespace is treated literally.
2846	/// For example ` +` will match one or more ASCII whitespace characters.
2847	///
2848	/// When verbose mode is enabled, `\#` can be used to match a literal `#`
2849	/// and `\ ` can be used to match a literal ASCII whitespace character.
2850	///
2851	/// Verbose mode is useful for permitting regexes to be formatted and
2852	/// broken up more nicely. This may make them more easily readable.
2853	///
2854	/// This setting can also be configured using the inline flag `x` in the
2855	/// pattern.
2856	///
2857	/// The default for this is `false`.
2858	///
2859	/// # Example
2860	///
2861	/// ```
2862	/// use regex_lite::RegexBuilder;
2863	///
2864	/// let pat = r"
2865	/// \b
2866	/// (?<first>[A-Z]\w*) # always start with uppercase letter
2867	/// \s+ # whitespace should separate names
2868	/// (?: # middle name can be an initial!
2869	/// (?:(?<initial>[A-Z])\.\|(?<middle>[A-Z]\w*))
2870	/// \s+
2871	/// )?
2872	/// (?<last>[A-Z]\w*)
2873	/// \b
2874	/// ";
2875	/// let re = RegexBuilder::new(pat)
2876	/// .ignore_whitespace(`true`)
2877	/// .build()
2878	/// .unwrap();
2879	///
2880	/// let caps = re.captures("Harry Potter").unwrap();
2881	/// assert_eq!("Harry", &caps["first"]);
2882	/// assert_eq!("Potter", &caps["last"]);
2883	///
2884	/// let caps = re.captures("Harry J. Potter").unwrap();
2885	/// assert_eq!("Harry", &caps["first"]);
2886	/// // Since a middle name/initial isn't required for an overall match,
2887	/// // we can't assume that 'initial' or 'middle' will be populated!
2888	/// assert_eq!(Some("J"), caps.name("initial").map(\|m\| m.as_str()));
2889	/// assert_eq!(None, caps.name("middle").map(\|m\| m.as_str()));
2890	/// assert_eq!("Potter", &caps["last"]);
2891	///
2892	/// let caps = re.captures("Harry James Potter").unwrap();
2893	/// assert_eq!("Harry", &caps["first"]);
2894	/// // Since a middle name/initial isn't required for an overall match,
2895	/// // we can't assume that 'initial' or 'middle' will be populated!
2896	/// assert_eq!(None, caps.name("initial").map(\|m\| m.as_str()));
2897	/// assert_eq!(Some("James"), caps.name("middle").map(\|m\| m.as_str()));
2898	/// assert_eq!("Potter", &caps["last"]);
2899	/// ```
2900	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {
2901	self.hir_config.flags.ignore_whitespace = yes;
2902	self
2903	}
2904
2905	/// Sets the approximate size limit, in bytes, of the compiled regex.
2906	///
2907	/// This roughly corresponds to the number of heap memory, in bytes,
2908	/// occupied by a single regex. If the regex would otherwise approximately
2909	/// exceed this limit, then compiling that regex will fail.
2910	///
2911	/// The main utility of a method like this is to avoid compiling regexes
2912	/// that use an unexpected amount of resources, such as time and memory.
2913	/// Even if the memory usage of a large regex is acceptable, its search
2914	/// time may not be. Namely, worst case time complexity for search is `O(m
2915	/// n)`, where `m ~ len(pattern)` and `n ~ len(haystack)`. That is,*
2916	/// search time depends, in part, on the size of the compiled regex. This
2917	/// means that putting a limit on the size of the regex limits how much a
2918	/// regex can impact search time.
2919	///
2920	/// The default for this is some reasonable number that permits most
2921	/// patterns to compile successfully.
2922	///
2923	/// # Example
2924	///
2925	/// ```
2926	/// use regex_lite::RegexBuilder;
2927	///
2928	/// assert!(RegexBuilder::new(r"\w").size_limit(`100`).build().is_err());
2929	/// ```
2930	pub fn size_limit(&mut self, limit: usize) -> &mut RegexBuilder {
2931	self.nfa_config.size_limit = Some(limit);
2932	self
2933	}
2934
2935	/// Set the nesting limit for this parser.
2936	///
2937	/// The nesting limit controls how deep the abstract syntax tree is allowed
2938	/// to be. If the AST exceeds the given limit (e.g., with too many nested
2939	/// groups), then an error is returned by the parser.
2940	///
2941	/// The purpose of this limit is to act as a heuristic to prevent stack
2942	/// overflow for consumers that do structural induction on an AST using
2943	/// explicit recursion. While this crate never does this (instead using
2944	/// constant stack space and moving the call stack to the heap), other
2945	/// crates may.
2946	///
2947	/// This limit is not checked until the entire AST is parsed. Therefore, if
2948	/// callers want to put a limit on the amount of heap space used, then they
2949	/// should impose a limit on the length, in bytes, of the concrete pattern
2950	/// string. In particular, this is viable since this parser implementation
2951	/// will limit itself to heap space proportional to the length of the
2952	/// pattern string. See also the [untrusted inputs](crate#untrusted-input)
2953	/// section in the top-level crate documentation for more information about
2954	/// this.
2955	///
2956	/// Note that a nest limit of `0` will return a nest limit error for most
2957	/// patterns but not all. For example, a nest limit of `0` permits `a` but
2958	/// not `ab`, since `ab` requires an explicit concatenation, which results
2959	/// in a nest depth of `1`. In general, a nest limit is not something that
2960	/// manifests in an obvious way in the concrete syntax, therefore, it
2961	/// should not be used in a granular way.
2962	///
2963	/// # Example
2964	///
2965	/// ```
2966	/// use regex_lite::RegexBuilder;
2967	///
2968	/// assert!(RegexBuilder::new(r"").nest_limit(`0`).build().is_ok());
2969	/// assert!(RegexBuilder::new(r"a").nest_limit(`0`).build().is_ok());
2970	/// assert!(RegexBuilder::new(r"(a)").nest_limit(`0`).build().is_err());
2971	/// ```
2972	pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
2973	self.hir_config.nest_limit = limit;
2974	self
2975	}
2976	}
2977