nfa.rs source code [crates/regex_lite/src/nfa.rs]

1	use core::{cell::RefCell, mem::size_of};
2
3	use alloc::{string::String, sync::Arc, vec, vec::Vec};
4
5	use crate::{
6	error::Error,
7	hir::{self, Hir, HirKind},
8	int::U32,
9	};
10
11	pub(crate) type StateID = u32;
12
13	#[derive(Clone, Copy, Debug)]
14	pub(crate) struct Config {
15	pub(crate) size_limit: Option<usize>,
16	}
17
18	impl Default for Config {
19	fn default() -> Config {
20	Config { size_limit: Some(`10` * (`1` << `20`)) }
21	}
22	}
23
24	#[derive(Clone)]
25	pub(crate) struct NFA {
26	/// The pattern string this NFA was generated from.
27	///
28	/// We put it here for lack of a better place to put it. ¯\_(ツ)_/¯
29	pattern: String,
30	/// The states that make up this NFA.
31	states: Vec<State>,
32	/// The ID of the start state.
33	start: StateID,
34	/// Whether this NFA can only match at the beginning of a haystack.
35	is_start_anchored: bool,
36	/// Whether this NFA can match the empty string.
37	is_match_empty: bool,
38	/// If every match has the same number of matching capture groups, then
39	/// this corresponds to the number of groups.
40	static_explicit_captures_len: Option<usize>,
41	/// A map from capture group name to its corresponding index.
42	cap_name_to_index: CaptureNameMap,
43	/// A map from capture group index to the corresponding name, if one
44	/// exists.
45	cap_index_to_name: Vec<Option<Arc<str>>>,
46	/// Heap memory used indirectly by NFA states and other things (like the
47	/// various capturing group representations above). Since each state
48	/// might use a different amount of heap, we need to keep track of this
49	/// incrementally.
50	memory_extra: usize,
51	}
52
53	impl NFA {
54	/// Creates a new NFA from the given configuration and HIR.
55	pub(crate) fn new(
56	config: Config,
57	pattern: String,
58	hir: &Hir,
59	) -> Result<NFA, Error> {
60	Compiler::new(config, pattern).compile(hir)
61	}
62
63	/// Returns the pattern string used to construct this NFA.
64	pub(crate) fn pattern(&self) -> &str {
65	&self.pattern
66	}
67
68	/// Returns the state corresponding to the given ID.
69	///
70	/// # Panics
71	///
72	/// If the ID does not refer to a valid state, then this panics.
73	pub(crate) fn state(&self, id: StateID) -> &State {
74	&self.states[id.as_usize()]
75	}
76
77	/// Returns the total number of states in this NFA.
78	pub(crate) fn len(&self) -> usize {
79	self.states.len()
80	}
81
82	/// Returns the ID of the starting state for this NFA.
83	pub(crate) fn start(&self) -> StateID {
84	self.start
85	}
86
87	/// Returns the capture group index for the corresponding named group.
88	/// If no such group with the given name exists, then `None` is returned.
89	pub(crate) fn to_index(&self, name: &str) -> Option<usize> {
90	self.cap_name_to_index.get(name).cloned().map(\|i\| i.as_usize())
91	}
92
93	/*
94	/// Returns the capture group name for the corresponding index.
95	/// If no such group with the given index, then `None` is returned.
96	pub(crate) fn to_name(&self, index: usize) -> Option<&str> {
97	self.cap_index_to_name.get(index)?.as_deref()
98	}
99	*/
100
101	/// Returns an iterator over all of the capture groups, along with their
102	/// names if they exist, in this NFA.
103	pub(crate) fn capture_names(&self) -> CaptureNames<'_> {
104	CaptureNames { it: self.cap_index_to_name.iter() }
105	}
106
107	/// Returns the total number of capture groups, including the first and
108	/// implicit group, in this NFA.
109	pub(crate) fn group_len(&self) -> usize {
110	self.cap_index_to_name.len()
111	}
112
113	/// Returns true if and only if this NFA can only match at the beginning of
114	/// a haystack.
115	pub(crate) fn is_start_anchored(&self) -> bool {
116	self.is_start_anchored
117	}
118
119	/// If the pattern always reports the same number of matching capture groups
120	/// for every match, then this returns the number of those groups. This
121	/// doesn't include the implicit group found in every pattern.
122	pub(crate) fn static_explicit_captures_len(&self) -> Option<usize> {
123	self.static_explicit_captures_len
124	}
125
126	/// Returns the heap memory usage, in bytes, used by this NFA.
127	fn memory_usage(&self) -> usize {
128	(self.states.len() * size_of::<State>())
129	+ (self.cap_index_to_name.len() * size_of::<Option<Arc<str>>>())
130	+ self.memory_extra
131	}
132	}
133
134	impl core::fmt::Debug for NFA {
135	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
136	writeln!(f, "NFA(")?;
137	writeln!(f, "pattern: {}", self.pattern)?;
138	for (sid: usize, state: &State) in self.states.iter().enumerate() {
139	writeln!(f, "{:`07`?}: {:?}", sid, state)?;
140	}
141	writeln!(f, ")")?;
142	Ok(())
143	}
144	}
145
146	/// An iterator over all capture groups in an NFA.
147	///
148	/// If a particular group has a name, then it is yielded. Otherwise, `None`
149	/// is yielded.
150	#[derive(Clone, Debug)]
151	pub(crate) struct CaptureNames<'a> {
152	it: core::slice::Iter<'a, Option<Arc<str>>>,
153	}
154
155	impl<'a> Iterator for CaptureNames<'a> {
156	type Item = Option<&'a str>;
157
158	fn next(&mut self) -> Option<Option<&'a str>> {
159	self.it.next().map(\|n: &'a Option>\| n.as_deref())
160	}
161	}
162
163	#[derive(Clone, Eq, PartialEq)]
164	pub(crate) enum State {
165	Char { target: StateID, ch: char },
166	Ranges { target: StateID, ranges: Vec<(char, char)> },
167	Splits { targets: Vec<StateID>, reverse: bool },
168	Goto { target: StateID, look: Option<hir::Look> },
169	Capture { target: StateID, slot: u32 },
170	Fail,
171	Match,
172	}
173
174	impl State {
175	/// Returns the heap memory usage of this NFA state in bytes.
176	fn memory_usage(&self) -> usize {
177	match *self {
178	State::Char { .. }
179	\| State::Goto { .. }
180	\| State::Capture { .. }
181	\| State::Fail { .. }
182	\| State::Match => `0`,
183	State::Splits { ref targets, .. } => {
184	targets.len() * size_of::<StateID>()
185	}
186	State::Ranges { ref ranges, .. } => {
187	ranges.len() * size_of::<(char, char)>()
188	}
189	}
190	}
191
192	/// Returns an iterator over the given split targets. The order of the
193	/// iterator yields elements in reverse when `reverse` is true.
194	pub(crate) fn iter_splits<'a>(
195	splits: &'a [StateID],
196	reverse: bool,
197	) -> impl Iterator<Item = StateID> + 'a {
198	let mut it = splits.iter();
199	core::iter::from_fn(move \|\| {
200	if reverse { it.next_back() } else { it.next() }.copied()
201	})
202	}
203	}
204
205	impl core::fmt::Debug for State {
206	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
207	match *self {
208	State::Char { target, ch } => {
209	write!(f, "{:?} => {:?}", ch, target)
210	}
211	State::Ranges { target, ref ranges } => {
212	for (i, &(start, end)) in ranges.iter().enumerate() {
213	if i > `0` {
214	write!(f, ", ")?;
215	}
216	write!(f, "{:?}-{:?} => {:?}", start, end, target)?;
217	}
218	Ok(())
219	}
220	State::Splits { ref targets, reverse } => {
221	write!(f, "splits(")?;
222	for (i, sid) in
223	State::iter_splits(targets, reverse).enumerate()
224	{
225	if i > `0` {
226	write!(f, ", ")?;
227	}
228	write!(f, "{:?}", sid)?;
229	}
230	write!(f, ")")
231	}
232	State::Goto { target, look: None } => {
233	write!(f, "goto({:?})", target)
234	}
235	State::Goto { target, look: Some(look) } => {
236	write!(f, "{:?} => {:?}", look, target)
237	}
238	State::Capture { target, slot } => {
239	write!(f, "capture(slot={:?}) => {:?}", slot, target,)
240	}
241	State::Fail => write!(f, "FAIL"),
242	State::Match => {
243	write!(f, "MATCH")
244	}
245	}
246	}
247	}
248
249	/// A map from capture group name to its corresponding capture group index.
250	///
251	/// We define a type alias here so that we can transparently use a `HashMap`
252	/// whenever it's available. We do so presumably because it's faster, although
253	/// there are no benchmarks verifying this.
254	#[cfg(feature = "std")]
255	type CaptureNameMap = std::collections::HashMap<Arc<str>, u32>;
256	#[cfg(not(feature = "std"))]
257	type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, u32>;
258
259	#[derive(Debug)]
260	struct Compiler {
261	config: Config,
262	nfa: RefCell<NFA>,
263	}
264
265	impl Compiler {
266	fn new(config: Config, pattern: String) -> Compiler {
267	let nfa = RefCell::new(NFA {
268	pattern,
269	states: vec![],
270	start: `0`,
271	is_start_anchored: `false`,
272	is_match_empty: `false`,
273	static_explicit_captures_len: None,
274	cap_name_to_index: CaptureNameMap::default(),
275	cap_index_to_name: vec![],
276	memory_extra: `0`,
277	});
278	Compiler { config, nfa }
279	}
280
281	fn compile(self, hir: &Hir) -> Result<NFA, Error> {
282	self.nfa.borrow_mut().is_start_anchored = hir.is_start_anchored();
283	self.nfa.borrow_mut().is_match_empty = hir.is_match_empty();
284	self.nfa.borrow_mut().static_explicit_captures_len =
285	hir.static_explicit_captures_len();
286	let compiled = self.c_capture(`0`, None, hir)?;
287	let mat = self.add(State::Match)?;
288	self.patch(compiled.end, mat)?;
289	self.nfa.borrow_mut().start = compiled.start;
290	Ok(self.nfa.into_inner())
291	}
292
293	fn c(&self, hir: &Hir) -> Result<ThompsonRef, Error> {
294	match *hir.kind() {
295	HirKind::Empty => self.c_empty(),
296	HirKind::Char(ch) => self.c_char(ch),
297	HirKind::Class(ref class) => self.c_class(class),
298	HirKind::Look(ref look) => self.c_look(look),
299	HirKind::Repetition(ref rep) => self.c_repetition(rep),
300	HirKind::Capture(ref cap) => {
301	self.c_capture(cap.index, cap.name.as_deref(), &cap.sub)
302	}
303	HirKind::Concat(ref subs) => {
304	self.c_concat(subs.iter().map(\|s\| self.c(s)))
305	}
306	HirKind::Alternation(ref subs) => {
307	self.c_alternation(subs.iter().map(\|s\| self.c(s)))
308	}
309	}
310	}
311
312	/// Compile a "fail" state that can never be transitioned out of.
313	fn c_fail(&self) -> Result<ThompsonRef, Error> {
314	let id = self.add(State::Fail)?;
315	Ok(ThompsonRef { start: id, end: id })
316	}
317
318	/// Compile an "empty" state with one unconditional epsilon transition.
319	///
320	/// Both the `start` and `end` locations point to the state created.
321	/// Callers will likely want to keep the `start`, but patch the `end` to
322	/// point to some other state.
323	fn c_empty(&self) -> Result<ThompsonRef, Error> {
324	let id = self.add_empty()?;
325	Ok(ThompsonRef { start: id, end: id })
326	}
327
328	/// Compile the given literal char to an NFA.
329	fn c_char(&self, ch: char) -> Result<ThompsonRef, Error> {
330	let id = self.add(State::Char { target: `0`, ch })?;
331	Ok(ThompsonRef { start: id, end: id })
332	}
333
334	/// Compile the given character class into an NFA.
335	///
336	/// If the class is empty, then this compiles to a `Fail` state.
337	fn c_class(&self, class: &hir::Class) -> Result<ThompsonRef, Error> {
338	let id = if class.ranges.is_empty() {
339	// Technically using an explicit fail state probably isn't
340	// necessary. Because if you try to match against an empty Ranges,
341	// then it should turn up with nothing regardless of input, and
342	// thus "acts" like a Fail state. But it's better to be more
343	// explicit, and there's no real cost to doing so.
344	self.add(State::Fail)
345	} else {
346	let ranges =
347	class.ranges.iter().map(\|r\| (r.start, r.end)).collect();
348	self.add(State::Ranges { target: `0`, ranges })
349	}?;
350	Ok(ThompsonRef { start: id, end: id })
351	}
352
353	/// Compile the given HIR look-around assertion to an NFA look-around
354	/// assertion.
355	fn c_look(&self, look: &hir::Look) -> Result<ThompsonRef, Error> {
356	let id = self.add(State::Goto { target: `0`, look: Some(*look) })?;
357	Ok(ThompsonRef { start: id, end: id })
358	}
359
360	/// Compile the given repetition expression. This handles all types of
361	/// repetitions and greediness.
362	fn c_repetition(
363	&self,
364	rep: &hir::Repetition,
365	) -> Result<ThompsonRef, Error> {
366	match (rep.min, rep.max) {
367	(`0`, Some(`1`)) => self.c_zero_or_one(&rep.sub, rep.greedy),
368	(min, None) => self.c_at_least(&rep.sub, rep.greedy, min),
369	(min, Some(max)) if min == max => self.c_exactly(&rep.sub, min),
370	(min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max),
371	}
372	}
373
374	/// Compile the given expression such that it matches at least `min` times,
375	/// but no more than `max` times.
376	///
377	/// When `greedy` is true, then the preference is for the expression to
378	/// match as much as possible. Otherwise, it will match as little as
379	/// possible.
380	fn c_bounded(
381	&self,
382	hir: &Hir,
383	greedy: bool,
384	min: u32,
385	max: u32,
386	) -> Result<ThompsonRef, Error> {
387	let prefix = self.c_exactly(hir, min)?;
388	if min == max {
389	return Ok(prefix);
390	}
391
392	// It is tempting here to compile the rest here as a concatenation
393	// of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
394	// were `aaa?a?a?`. The problem here is that it leads to this program:
395	//
396	// >000000: 61 => 01
397	// 000001: 61 => 02
398	// 000002: union(03, 04)
399	// 000003: 61 => 04
400	// 000004: union(05, 06)
401	// 000005: 61 => 06
402	// 000006: union(07, 08)
403	// 000007: 61 => 08
404	// 000008: MATCH
405	//
406	// And effectively, once you hit state 2, the epsilon closure will
407	// include states 3, 5, 6, 7 and 8, which is quite a bit. It is better
408	// to instead compile it like so:
409	//
410	// >000000: 61 => 01
411	// 000001: 61 => 02
412	// 000002: union(03, 08)
413	// 000003: 61 => 04
414	// 000004: union(05, 08)
415	// 000005: 61 => 06
416	// 000006: union(07, 08)
417	// 000007: 61 => 08
418	// 000008: MATCH
419	//
420	// So that the epsilon closure of state 2 is now just 3 and 8.
421	let empty = self.add_empty()?;
422	let mut prev_end = prefix.end;
423	for _ in min..max {
424	let splits =
425	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
426	let compiled = self.c(hir)?;
427	self.patch(prev_end, splits)?;
428	self.patch(splits, compiled.start)?;
429	self.patch(splits, empty)?;
430	prev_end = compiled.end;
431	}
432	self.patch(prev_end, empty)?;
433	Ok(ThompsonRef { start: prefix.start, end: empty })
434	}
435
436	/// Compile the given expression such that it may be matched `n` or more
437	/// times, where `n` can be any integer. (Although a particularly large
438	/// integer is likely to run afoul of any configured size limits.)
439	///
440	/// When `greedy` is true, then the preference is for the expression to
441	/// match as much as possible. Otherwise, it will match as little as
442	/// possible.
443	fn c_at_least(
444	&self,
445	hir: &Hir,
446	greedy: bool,
447	n: u32,
448	) -> Result<ThompsonRef, Error> {
449	if n == `0` {
450	// When the expression cannot match the empty string, then we
451	// can get away with something much simpler: just one 'alt'
452	// instruction that optionally repeats itself. But if the expr
453	// can match the empty string... see below.
454	if !hir.is_match_empty() {
455	let splits = self.add(State::Splits {
456	targets: vec![],
457	reverse: !greedy,
458	})?;
459	let compiled = self.c(hir)?;
460	self.patch(splits, compiled.start)?;
461	self.patch(compiled.end, splits)?;
462	return Ok(ThompsonRef { start: splits, end: splits });
463	}
464
465	// What's going on here? Shouldn't x be simpler than this? It*
466	// turns out that when implementing leftmost-first (Perl-like)
467	// match semantics, x results in an incorrect preference order*
468	// when computing the transitive closure of states if and only if
469	// 'x' can match the empty string. So instead, we compile x as*
470	// (x+)?, which preserves the correct preference order.
471	//
472	// See: https://github.com/rust-lang/regex/issues/779
473	let compiled = self.c(hir)?;
474	let plus =
475	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
476	self.patch(compiled.end, plus)?;
477	self.patch(plus, compiled.start)?;
478
479	let question =
480	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
481	let empty = self.add_empty()?;
482	self.patch(question, compiled.start)?;
483	self.patch(question, empty)?;
484	self.patch(plus, empty)?;
485	Ok(ThompsonRef { start: question, end: empty })
486	} else if n == `1` {
487	let compiled = self.c(hir)?;
488	let splits =
489	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
490	self.patch(compiled.end, splits)?;
491	self.patch(splits, compiled.start)?;
492	Ok(ThompsonRef { start: compiled.start, end: splits })
493	} else {
494	let prefix = self.c_exactly(hir, n - `1`)?;
495	let last = self.c(hir)?;
496	let splits =
497	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
498	self.patch(prefix.end, last.start)?;
499	self.patch(last.end, splits)?;
500	self.patch(splits, last.start)?;
501	Ok(ThompsonRef { start: prefix.start, end: splits })
502	}
503	}
504
505	/// Compile the given expression such that it may be matched zero or one
506	/// times.
507	///
508	/// When `greedy` is true, then the preference is for the expression to
509	/// match as much as possible. Otherwise, it will match as little as
510	/// possible.
511	fn c_zero_or_one(
512	&self,
513	hir: &Hir,
514	greedy: bool,
515	) -> Result<ThompsonRef, Error> {
516	let splits =
517	self.add(State::Splits { targets: vec![], reverse: !greedy })?;
518	let compiled = self.c(hir)?;
519	let empty = self.add_empty()?;
520	self.patch(splits, compiled.start)?;
521	self.patch(splits, empty)?;
522	self.patch(compiled.end, empty)?;
523	Ok(ThompsonRef { start: splits, end: empty })
524	}
525
526	/// Compile the given HIR expression exactly `n` times.
527	fn c_exactly(&self, hir: &Hir, n: u32) -> Result<ThompsonRef, Error> {
528	self.c_concat((`0`..n).map(\|_\| self.c(hir)))
529	}
530
531	/// Compile the given expression and insert capturing states at the
532	/// beginning and end of it. The slot for the capture states is computed
533	/// from the index.
534	fn c_capture(
535	&self,
536	index: u32,
537	name: Option<&str>,
538	hir: &Hir,
539	) -> Result<ThompsonRef, Error> {
540	// For discontiguous indices, push placeholders for earlier capture
541	// groups that weren't explicitly added. This can happen, for example,
542	// with patterns like '(a){0}(a)' where '(a){0}' is completely removed
543	// from the pattern.
544	let existing_groups_len = self.nfa.borrow().cap_index_to_name.len();
545	for _ in `0`..(index.as_usize().saturating_sub(existing_groups_len)) {
546	self.nfa.borrow_mut().cap_index_to_name.push(None);
547	}
548	if index.as_usize() >= existing_groups_len {
549	if let Some(name) = name {
550	let name = Arc::from(name);
551	let mut nfa = self.nfa.borrow_mut();
552	nfa.cap_name_to_index.insert(Arc::clone(&name), index);
553	nfa.cap_index_to_name.push(Some(Arc::clone(&name)));
554	// This is an approximation.
555	nfa.memory_extra += name.len() + size_of::<u32>();
556	} else {
557	self.nfa.borrow_mut().cap_index_to_name.push(None);
558	}
559	}
560
561	let Some(slot) = index.checked_mul(`2`) else {
562	return Err(Error::new("capture group slots exhausted"));
563	};
564	let start = self.add(State::Capture { target: `0`, slot })?;
565	let inner = self.c(hir)?;
566	let Some(slot) = slot.checked_add(`1`) else {
567	return Err(Error::new("capture group slots exhausted"));
568	};
569	let end = self.add(State::Capture { target: `0`, slot })?;
570	self.patch(start, inner.start)?;
571	self.patch(inner.end, end)?;
572
573	Ok(ThompsonRef { start, end })
574	}
575
576	/// Compile a concatenation of the sub-expressions yielded by the given
577	/// iterator. If the iterator yields no elements, then this compiles down
578	/// to an "empty" state that always matches.
579	fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
580	where
581	I: Iterator<Item = Result<ThompsonRef, Error>>,
582	{
583	let ThompsonRef { start, mut end } = match it.next() {
584	Some(result) => result?,
585	None => return self.c_empty(),
586	};
587	for result in it {
588	let compiled = result?;
589	self.patch(end, compiled.start)?;
590	end = compiled.end;
591	}
592	Ok(ThompsonRef { start, end })
593	}
594
595	/// Compile an alternation, where each element yielded by the given
596	/// iterator represents an item in the alternation. If the iterator yields
597	/// no elements, then this compiles down to a "fail" state.
598	///
599	/// In an alternation, expressions appearing earlier are "preferred" at
600	/// match time over expressions appearing later. (This is currently always
601	/// true, as this crate only supports leftmost-first semantics.)
602	fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
603	where
604	I: Iterator<Item = Result<ThompsonRef, Error>>,
605	{
606	let first = match it.next() {
607	None => return self.c_fail(),
608	Some(result) => result?,
609	};
610	let second = match it.next() {
611	None => return Ok(first),
612	Some(result) => result?,
613	};
614
615	let splits =
616	self.add(State::Splits { targets: vec![], reverse: `false` })?;
617	let end = self.add_empty()?;
618	self.patch(splits, first.start)?;
619	self.patch(first.end, end)?;
620	self.patch(splits, second.start)?;
621	self.patch(second.end, end)?;
622	for result in it {
623	let compiled = result?;
624	self.patch(splits, compiled.start)?;
625	self.patch(compiled.end, end)?;
626	}
627	Ok(ThompsonRef { start: splits, end })
628	}
629
630	/// A convenience routine for adding an empty state, also known as an
631	/// unconditional epsilon transition. These are quite useful for making
632	/// NFA construction simpler.
633	///
634	/// (In the regex crate, we do a second pass to remove these, but don't
635	/// bother with that here.)
636	fn add_empty(&self) -> Result<StateID, Error> {
637	self.add(State::Goto { target: `0`, look: None })
638	}
639
640	/// The common implementation of "add a state." It handles the common
641	/// error cases of state ID exhausting (by owning state ID allocation) and
642	/// whether the size limit has been exceeded.
643	fn add(&self, state: State) -> Result<StateID, Error> {
644	let id = u32::try_from(self.nfa.borrow().states.len())
645	.map_err(\|_\| Error::new("exhausted state IDs, too many states"))?;
646	self.nfa.borrow_mut().memory_extra += state.memory_usage();
647	self.nfa.borrow_mut().states.push(state);
648	self.check_size_limit()?;
649	Ok(id)
650	}
651
652	/// Add a transition from one state to another.
653	///
654	/// This routine is called "patch" since it is very common to add the
655	/// states you want, typically with "dummy" state ID transitions, and then
656	/// "patch" in the real state IDs later. This is because you don't always
657	/// know all of the necessary state IDs to add because they might not
658	/// exist yet.
659	///
660	/// # Errors
661	///
662	/// This may error if patching leads to an increase in heap usage beyond
663	/// the configured size limit. Heap usage only grows when patching adds a
664	/// new transition (as in the case of a "splits" state).
665	fn patch(&self, from: StateID, to: StateID) -> Result<(), Error> {
666	let mut new_memory_extra = self.nfa.borrow().memory_extra;
667	match self.nfa.borrow_mut().states[from.as_usize()] {
668	State::Char { ref mut target, .. } => {
669	*target = to;
670	}
671	State::Ranges { ref mut target, .. } => {
672	*target = to;
673	}
674	State::Splits { ref mut targets, .. } => {
675	targets.push(to);
676	new_memory_extra += size_of::<StateID>();
677	}
678	State::Goto { ref mut target, .. } => {
679	*target = to;
680	}
681	State::Capture { ref mut target, .. } => {
682	*target = to;
683	}
684	State::Fail \| State::Match => {}
685	}
686	if new_memory_extra != self.nfa.borrow().memory_extra {
687	self.nfa.borrow_mut().memory_extra = new_memory_extra;
688	self.check_size_limit()?;
689	}
690	Ok(())
691	}
692
693	/// Checks that the current heap memory usage of the NFA being compiled
694	/// doesn't exceed the configured size limit. If it does, an error is
695	/// returned.
696	fn check_size_limit(&self) -> Result<(), Error> {
697	if let Some(limit) = self.config.size_limit {
698	if self.nfa.borrow().memory_usage() > limit {
699	return Err(Error::new("compiled regex exceeded size limit"));
700	}
701	}
702	Ok(())
703	}
704	}
705
706	/// A value that represents the result of compiling a sub-expression of a
707	/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
708	/// has an initial state at `start` and a final state at `end`.
709	#[derive(Clone, Copy, Debug)]
710	struct ThompsonRef {
711	start: StateID,
712	end: StateID,
713	}
714