1use core::{cell::RefCell, mem::size_of};
2
3use alloc::{string::String, sync::Arc, vec, vec::Vec};
4
5use crate::{
6 error::Error,
7 hir::{self, Hir, HirKind},
8 int::U32,
9};
10
11pub(crate) type StateID = u32;
12
13#[derive(Clone, Copy, Debug)]
14pub(crate) struct Config {
15 pub(crate) size_limit: Option<usize>,
16}
17
18impl Default for Config {
19 fn default() -> Config {
20 Config { size_limit: Some(10 * (1 << 20)) }
21 }
22}
23
24#[derive(Clone)]
25pub(crate) struct NFA {
26 /// The pattern string this NFA was generated from.
27 ///
28 /// We put it here for lack of a better place to put it. ¯\_(ツ)_/¯
29 pattern: String,
30 /// The states that make up this NFA.
31 states: Vec<State>,
32 /// The ID of the start state.
33 start: StateID,
34 /// Whether this NFA can only match at the beginning of a haystack.
35 is_start_anchored: bool,
36 /// Whether this NFA can match the empty string.
37 is_match_empty: bool,
38 /// If every match has the same number of matching capture groups, then
39 /// this corresponds to the number of groups.
40 static_explicit_captures_len: Option<usize>,
41 /// A map from capture group name to its corresponding index.
42 cap_name_to_index: CaptureNameMap,
43 /// A map from capture group index to the corresponding name, if one
44 /// exists.
45 cap_index_to_name: Vec<Option<Arc<str>>>,
46 /// Heap memory used indirectly by NFA states and other things (like the
47 /// various capturing group representations above). Since each state
48 /// might use a different amount of heap, we need to keep track of this
49 /// incrementally.
50 memory_extra: usize,
51}
52
53impl NFA {
54 /// Creates a new NFA from the given configuration and HIR.
55 pub(crate) fn new(
56 config: Config,
57 pattern: String,
58 hir: &Hir,
59 ) -> Result<NFA, Error> {
60 Compiler::new(config, pattern).compile(hir)
61 }
62
63 /// Returns the pattern string used to construct this NFA.
64 pub(crate) fn pattern(&self) -> &str {
65 &self.pattern
66 }
67
68 /// Returns the state corresponding to the given ID.
69 ///
70 /// # Panics
71 ///
72 /// If the ID does not refer to a valid state, then this panics.
73 pub(crate) fn state(&self, id: StateID) -> &State {
74 &self.states[id.as_usize()]
75 }
76
77 /// Returns the total number of states in this NFA.
78 pub(crate) fn len(&self) -> usize {
79 self.states.len()
80 }
81
82 /// Returns the ID of the starting state for this NFA.
83 pub(crate) fn start(&self) -> StateID {
84 self.start
85 }
86
87 /// Returns the capture group index for the corresponding named group.
88 /// If no such group with the given name exists, then `None` is returned.
89 pub(crate) fn to_index(&self, name: &str) -> Option<usize> {
90 self.cap_name_to_index.get(name).cloned().map(|i| i.as_usize())
91 }
92
93 /*
94 /// Returns the capture group name for the corresponding index.
95 /// If no such group with the given index, then `None` is returned.
96 pub(crate) fn to_name(&self, index: usize) -> Option<&str> {
97 self.cap_index_to_name.get(index)?.as_deref()
98 }
99 */
100
101 /// Returns an iterator over all of the capture groups, along with their
102 /// names if they exist, in this NFA.
103 pub(crate) fn capture_names(&self) -> CaptureNames<'_> {
104 CaptureNames { it: self.cap_index_to_name.iter() }
105 }
106
107 /// Returns the total number of capture groups, including the first and
108 /// implicit group, in this NFA.
109 pub(crate) fn group_len(&self) -> usize {
110 self.cap_index_to_name.len()
111 }
112
113 /// Returns true if and only if this NFA can only match at the beginning of
114 /// a haystack.
115 pub(crate) fn is_start_anchored(&self) -> bool {
116 self.is_start_anchored
117 }
118
119 /// If the pattern always reports the same number of matching capture groups
120 /// for every match, then this returns the number of those groups. This
121 /// doesn't include the implicit group found in every pattern.
122 pub(crate) fn static_explicit_captures_len(&self) -> Option<usize> {
123 self.static_explicit_captures_len
124 }
125
126 /// Returns the heap memory usage, in bytes, used by this NFA.
127 fn memory_usage(&self) -> usize {
128 (self.states.len() * size_of::<State>())
129 + (self.cap_index_to_name.len() * size_of::<Option<Arc<str>>>())
130 + self.memory_extra
131 }
132}
133
134impl core::fmt::Debug for NFA {
135 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
136 writeln!(f, "NFA(")?;
137 writeln!(f, "pattern: {}", self.pattern)?;
138 for (sid: usize, state: &State) in self.states.iter().enumerate() {
139 writeln!(f, "{:07?}: {:?}", sid, state)?;
140 }
141 writeln!(f, ")")?;
142 Ok(())
143 }
144}
145
146/// An iterator over all capture groups in an NFA.
147///
148/// If a particular group has a name, then it is yielded. Otherwise, `None`
149/// is yielded.
150#[derive(Clone, Debug)]
151pub(crate) struct CaptureNames<'a> {
152 it: core::slice::Iter<'a, Option<Arc<str>>>,
153}
154
155impl<'a> Iterator for CaptureNames<'a> {
156 type Item = Option<&'a str>;
157
158 fn next(&mut self) -> Option<Option<&'a str>> {
159 self.it.next().map(|n: &'a Option>| n.as_deref())
160 }
161}
162
163#[derive(Clone, Eq, PartialEq)]
164pub(crate) enum State {
165 Char { target: StateID, ch: char },
166 Ranges { target: StateID, ranges: Vec<(char, char)> },
167 Splits { targets: Vec<StateID>, reverse: bool },
168 Goto { target: StateID, look: Option<hir::Look> },
169 Capture { target: StateID, slot: u32 },
170 Fail,
171 Match,
172}
173
174impl State {
175 /// Returns the heap memory usage of this NFA state in bytes.
176 fn memory_usage(&self) -> usize {
177 match *self {
178 State::Char { .. }
179 | State::Goto { .. }
180 | State::Capture { .. }
181 | State::Fail { .. }
182 | State::Match => 0,
183 State::Splits { ref targets, .. } => {
184 targets.len() * size_of::<StateID>()
185 }
186 State::Ranges { ref ranges, .. } => {
187 ranges.len() * size_of::<(char, char)>()
188 }
189 }
190 }
191
192 /// Returns an iterator over the given split targets. The order of the
193 /// iterator yields elements in reverse when `reverse` is true.
194 pub(crate) fn iter_splits<'a>(
195 splits: &'a [StateID],
196 reverse: bool,
197 ) -> impl Iterator<Item = StateID> + 'a {
198 let mut it = splits.iter();
199 core::iter::from_fn(move || {
200 if reverse { it.next_back() } else { it.next() }.copied()
201 })
202 }
203}
204
205impl core::fmt::Debug for State {
206 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
207 match *self {
208 State::Char { target, ch } => {
209 write!(f, "{:?} => {:?}", ch, target)
210 }
211 State::Ranges { target, ref ranges } => {
212 for (i, &(start, end)) in ranges.iter().enumerate() {
213 if i > 0 {
214 write!(f, ", ")?;
215 }
216 write!(f, "{:?}-{:?} => {:?}", start, end, target)?;
217 }
218 Ok(())
219 }
220 State::Splits { ref targets, reverse } => {
221 write!(f, "splits(")?;
222 for (i, sid) in
223 State::iter_splits(targets, reverse).enumerate()
224 {
225 if i > 0 {
226 write!(f, ", ")?;
227 }
228 write!(f, "{:?}", sid)?;
229 }
230 write!(f, ")")
231 }
232 State::Goto { target, look: None } => {
233 write!(f, "goto({:?})", target)
234 }
235 State::Goto { target, look: Some(look) } => {
236 write!(f, "{:?} => {:?}", look, target)
237 }
238 State::Capture { target, slot } => {
239 write!(f, "capture(slot={:?}) => {:?}", slot, target,)
240 }
241 State::Fail => write!(f, "FAIL"),
242 State::Match => {
243 write!(f, "MATCH")
244 }
245 }
246 }
247}
248
249/// A map from capture group name to its corresponding capture group index.
250///
251/// We define a type alias here so that we can transparently use a `HashMap`
252/// whenever it's available. We do so presumably because it's faster, although
253/// there are no benchmarks verifying this.
254#[cfg(feature = "std")]
255type CaptureNameMap = std::collections::HashMap<Arc<str>, u32>;
256#[cfg(not(feature = "std"))]
257type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, u32>;
258
259#[derive(Debug)]
260struct Compiler {
261 config: Config,
262 nfa: RefCell<NFA>,
263}
264
265impl Compiler {
266 fn new(config: Config, pattern: String) -> Compiler {
267 let nfa = RefCell::new(NFA {
268 pattern,
269 states: vec![],
270 start: 0,
271 is_start_anchored: false,
272 is_match_empty: false,
273 static_explicit_captures_len: None,
274 cap_name_to_index: CaptureNameMap::default(),
275 cap_index_to_name: vec![],
276 memory_extra: 0,
277 });
278 Compiler { config, nfa }
279 }
280
281 fn compile(self, hir: &Hir) -> Result<NFA, Error> {
282 self.nfa.borrow_mut().is_start_anchored = hir.is_start_anchored();
283 self.nfa.borrow_mut().is_match_empty = hir.is_match_empty();
284 self.nfa.borrow_mut().static_explicit_captures_len =
285 hir.static_explicit_captures_len();
286 let compiled = self.c_capture(0, None, hir)?;
287 let mat = self.add(State::Match)?;
288 self.patch(compiled.end, mat)?;
289 self.nfa.borrow_mut().start = compiled.start;
290 Ok(self.nfa.into_inner())
291 }
292
293 fn c(&self, hir: &Hir) -> Result<ThompsonRef, Error> {
294 match *hir.kind() {
295 HirKind::Empty => self.c_empty(),
296 HirKind::Char(ch) => self.c_char(ch),
297 HirKind::Class(ref class) => self.c_class(class),
298 HirKind::Look(ref look) => self.c_look(look),
299 HirKind::Repetition(ref rep) => self.c_repetition(rep),
300 HirKind::Capture(ref cap) => {
301 self.c_capture(cap.index, cap.name.as_deref(), &cap.sub)
302 }
303 HirKind::Concat(ref subs) => {
304 self.c_concat(subs.iter().map(|s| self.c(s)))
305 }
306 HirKind::Alternation(ref subs) => {
307 self.c_alternation(subs.iter().map(|s| self.c(s)))
308 }
309 }
310 }
311
312 /// Compile a "fail" state that can never be transitioned out of.
313 fn c_fail(&self) -> Result<ThompsonRef, Error> {
314 let id = self.add(State::Fail)?;
315 Ok(ThompsonRef { start: id, end: id })
316 }
317
318 /// Compile an "empty" state with one unconditional epsilon transition.
319 ///
320 /// Both the `start` and `end` locations point to the state created.
321 /// Callers will likely want to keep the `start`, but patch the `end` to
322 /// point to some other state.
323 fn c_empty(&self) -> Result<ThompsonRef, Error> {
324 let id = self.add_empty()?;
325 Ok(ThompsonRef { start: id, end: id })
326 }
327
328 /// Compile the given literal char to an NFA.
329 fn c_char(&self, ch: char) -> Result<ThompsonRef, Error> {
330 let id = self.add(State::Char { target: 0, ch })?;
331 Ok(ThompsonRef { start: id, end: id })
332 }
333
334 /// Compile the given character class into an NFA.
335 ///
336 /// If the class is empty, then this compiles to a `Fail` state.
337 fn c_class(&self, class: &hir::Class) -> Result<ThompsonRef, Error> {
338 let id = if class.ranges.is_empty() {
339 // Technically using an explicit fail state probably isn't
340 // necessary. Because if you try to match against an empty Ranges,
341 // then it should turn up with nothing regardless of input, and
342 // thus "acts" like a Fail state. But it's better to be more
343 // explicit, and there's no real cost to doing so.
344 self.add(State::Fail)
345 } else {
346 let ranges =
347 class.ranges.iter().map(|r| (r.start, r.end)).collect();
348 self.add(State::Ranges { target: 0, ranges })
349 }?;
350 Ok(ThompsonRef { start: id, end: id })
351 }
352
353 /// Compile the given HIR look-around assertion to an NFA look-around
354 /// assertion.
355 fn c_look(&self, look: &hir::Look) -> Result<ThompsonRef, Error> {
356 let id = self.add(State::Goto { target: 0, look: Some(*look) })?;
357 Ok(ThompsonRef { start: id, end: id })
358 }
359
360 /// Compile the given repetition expression. This handles all types of
361 /// repetitions and greediness.
362 fn c_repetition(
363 &self,
364 rep: &hir::Repetition,
365 ) -> Result<ThompsonRef, Error> {
366 match (rep.min, rep.max) {
367 (0, Some(1)) => self.c_zero_or_one(&rep.sub, rep.greedy),
368 (min, None) => self.c_at_least(&rep.sub, rep.greedy, min),
369 (min, Some(max)) if min == max => self.c_exactly(&rep.sub, min),
370 (min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max),
371 }
372 }
373
374 /// Compile the given expression such that it matches at least `min` times,
375 /// but no more than `max` times.
376 ///
377 /// When `greedy` is true, then the preference is for the expression to
378 /// match as much as possible. Otherwise, it will match as little as
379 /// possible.
380 fn c_bounded(
381 &self,
382 hir: &Hir,
383 greedy: bool,
384 min: u32,
385 max: u32,
386 ) -> Result<ThompsonRef, Error> {
387 let prefix = self.c_exactly(hir, min)?;
388 if min == max {
389 return Ok(prefix);
390 }
391
392 // It is tempting here to compile the rest here as a concatenation
393 // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
394 // were `aaa?a?a?`. The problem here is that it leads to this program:
395 //
396 // >000000: 61 => 01
397 // 000001: 61 => 02
398 // 000002: union(03, 04)
399 // 000003: 61 => 04
400 // 000004: union(05, 06)
401 // 000005: 61 => 06
402 // 000006: union(07, 08)
403 // 000007: 61 => 08
404 // 000008: MATCH
405 //
406 // And effectively, once you hit state 2, the epsilon closure will
407 // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better
408 // to instead compile it like so:
409 //
410 // >000000: 61 => 01
411 // 000001: 61 => 02
412 // 000002: union(03, 08)
413 // 000003: 61 => 04
414 // 000004: union(05, 08)
415 // 000005: 61 => 06
416 // 000006: union(07, 08)
417 // 000007: 61 => 08
418 // 000008: MATCH
419 //
420 // So that the epsilon closure of state 2 is now just 3 and 8.
421 let empty = self.add_empty()?;
422 let mut prev_end = prefix.end;
423 for _ in min..max {
424 let splits =
425 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
426 let compiled = self.c(hir)?;
427 self.patch(prev_end, splits)?;
428 self.patch(splits, compiled.start)?;
429 self.patch(splits, empty)?;
430 prev_end = compiled.end;
431 }
432 self.patch(prev_end, empty)?;
433 Ok(ThompsonRef { start: prefix.start, end: empty })
434 }
435
436 /// Compile the given expression such that it may be matched `n` or more
437 /// times, where `n` can be any integer. (Although a particularly large
438 /// integer is likely to run afoul of any configured size limits.)
439 ///
440 /// When `greedy` is true, then the preference is for the expression to
441 /// match as much as possible. Otherwise, it will match as little as
442 /// possible.
443 fn c_at_least(
444 &self,
445 hir: &Hir,
446 greedy: bool,
447 n: u32,
448 ) -> Result<ThompsonRef, Error> {
449 if n == 0 {
450 // When the expression cannot match the empty string, then we
451 // can get away with something much simpler: just one 'alt'
452 // instruction that optionally repeats itself. But if the expr
453 // can match the empty string... see below.
454 if !hir.is_match_empty() {
455 let splits = self.add(State::Splits {
456 targets: vec![],
457 reverse: !greedy,
458 })?;
459 let compiled = self.c(hir)?;
460 self.patch(splits, compiled.start)?;
461 self.patch(compiled.end, splits)?;
462 return Ok(ThompsonRef { start: splits, end: splits });
463 }
464
465 // What's going on here? Shouldn't x* be simpler than this? It
466 // turns out that when implementing leftmost-first (Perl-like)
467 // match semantics, x* results in an incorrect preference order
468 // when computing the transitive closure of states if and only if
469 // 'x' can match the empty string. So instead, we compile x* as
470 // (x+)?, which preserves the correct preference order.
471 //
472 // See: https://github.com/rust-lang/regex/issues/779
473 let compiled = self.c(hir)?;
474 let plus =
475 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
476 self.patch(compiled.end, plus)?;
477 self.patch(plus, compiled.start)?;
478
479 let question =
480 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
481 let empty = self.add_empty()?;
482 self.patch(question, compiled.start)?;
483 self.patch(question, empty)?;
484 self.patch(plus, empty)?;
485 Ok(ThompsonRef { start: question, end: empty })
486 } else if n == 1 {
487 let compiled = self.c(hir)?;
488 let splits =
489 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
490 self.patch(compiled.end, splits)?;
491 self.patch(splits, compiled.start)?;
492 Ok(ThompsonRef { start: compiled.start, end: splits })
493 } else {
494 let prefix = self.c_exactly(hir, n - 1)?;
495 let last = self.c(hir)?;
496 let splits =
497 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
498 self.patch(prefix.end, last.start)?;
499 self.patch(last.end, splits)?;
500 self.patch(splits, last.start)?;
501 Ok(ThompsonRef { start: prefix.start, end: splits })
502 }
503 }
504
505 /// Compile the given expression such that it may be matched zero or one
506 /// times.
507 ///
508 /// When `greedy` is true, then the preference is for the expression to
509 /// match as much as possible. Otherwise, it will match as little as
510 /// possible.
511 fn c_zero_or_one(
512 &self,
513 hir: &Hir,
514 greedy: bool,
515 ) -> Result<ThompsonRef, Error> {
516 let splits =
517 self.add(State::Splits { targets: vec![], reverse: !greedy })?;
518 let compiled = self.c(hir)?;
519 let empty = self.add_empty()?;
520 self.patch(splits, compiled.start)?;
521 self.patch(splits, empty)?;
522 self.patch(compiled.end, empty)?;
523 Ok(ThompsonRef { start: splits, end: empty })
524 }
525
526 /// Compile the given HIR expression exactly `n` times.
527 fn c_exactly(&self, hir: &Hir, n: u32) -> Result<ThompsonRef, Error> {
528 self.c_concat((0..n).map(|_| self.c(hir)))
529 }
530
531 /// Compile the given expression and insert capturing states at the
532 /// beginning and end of it. The slot for the capture states is computed
533 /// from the index.
534 fn c_capture(
535 &self,
536 index: u32,
537 name: Option<&str>,
538 hir: &Hir,
539 ) -> Result<ThompsonRef, Error> {
540 // For discontiguous indices, push placeholders for earlier capture
541 // groups that weren't explicitly added. This can happen, for example,
542 // with patterns like '(a){0}(a)' where '(a){0}' is completely removed
543 // from the pattern.
544 let existing_groups_len = self.nfa.borrow().cap_index_to_name.len();
545 for _ in 0..(index.as_usize().saturating_sub(existing_groups_len)) {
546 self.nfa.borrow_mut().cap_index_to_name.push(None);
547 }
548 if index.as_usize() >= existing_groups_len {
549 if let Some(name) = name {
550 let name = Arc::from(name);
551 let mut nfa = self.nfa.borrow_mut();
552 nfa.cap_name_to_index.insert(Arc::clone(&name), index);
553 nfa.cap_index_to_name.push(Some(Arc::clone(&name)));
554 // This is an approximation.
555 nfa.memory_extra += name.len() + size_of::<u32>();
556 } else {
557 self.nfa.borrow_mut().cap_index_to_name.push(None);
558 }
559 }
560
561 let Some(slot) = index.checked_mul(2) else {
562 return Err(Error::new("capture group slots exhausted"));
563 };
564 let start = self.add(State::Capture { target: 0, slot })?;
565 let inner = self.c(hir)?;
566 let Some(slot) = slot.checked_add(1) else {
567 return Err(Error::new("capture group slots exhausted"));
568 };
569 let end = self.add(State::Capture { target: 0, slot })?;
570 self.patch(start, inner.start)?;
571 self.patch(inner.end, end)?;
572
573 Ok(ThompsonRef { start, end })
574 }
575
576 /// Compile a concatenation of the sub-expressions yielded by the given
577 /// iterator. If the iterator yields no elements, then this compiles down
578 /// to an "empty" state that always matches.
579 fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
580 where
581 I: Iterator<Item = Result<ThompsonRef, Error>>,
582 {
583 let ThompsonRef { start, mut end } = match it.next() {
584 Some(result) => result?,
585 None => return self.c_empty(),
586 };
587 for result in it {
588 let compiled = result?;
589 self.patch(end, compiled.start)?;
590 end = compiled.end;
591 }
592 Ok(ThompsonRef { start, end })
593 }
594
595 /// Compile an alternation, where each element yielded by the given
596 /// iterator represents an item in the alternation. If the iterator yields
597 /// no elements, then this compiles down to a "fail" state.
598 ///
599 /// In an alternation, expressions appearing earlier are "preferred" at
600 /// match time over expressions appearing later. (This is currently always
601 /// true, as this crate only supports leftmost-first semantics.)
602 fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
603 where
604 I: Iterator<Item = Result<ThompsonRef, Error>>,
605 {
606 let first = match it.next() {
607 None => return self.c_fail(),
608 Some(result) => result?,
609 };
610 let second = match it.next() {
611 None => return Ok(first),
612 Some(result) => result?,
613 };
614
615 let splits =
616 self.add(State::Splits { targets: vec![], reverse: false })?;
617 let end = self.add_empty()?;
618 self.patch(splits, first.start)?;
619 self.patch(first.end, end)?;
620 self.patch(splits, second.start)?;
621 self.patch(second.end, end)?;
622 for result in it {
623 let compiled = result?;
624 self.patch(splits, compiled.start)?;
625 self.patch(compiled.end, end)?;
626 }
627 Ok(ThompsonRef { start: splits, end })
628 }
629
630 /// A convenience routine for adding an empty state, also known as an
631 /// unconditional epsilon transition. These are quite useful for making
632 /// NFA construction simpler.
633 ///
634 /// (In the regex crate, we do a second pass to remove these, but don't
635 /// bother with that here.)
636 fn add_empty(&self) -> Result<StateID, Error> {
637 self.add(State::Goto { target: 0, look: None })
638 }
639
640 /// The common implementation of "add a state." It handles the common
641 /// error cases of state ID exhausting (by owning state ID allocation) and
642 /// whether the size limit has been exceeded.
643 fn add(&self, state: State) -> Result<StateID, Error> {
644 let id = u32::try_from(self.nfa.borrow().states.len())
645 .map_err(|_| Error::new("exhausted state IDs, too many states"))?;
646 self.nfa.borrow_mut().memory_extra += state.memory_usage();
647 self.nfa.borrow_mut().states.push(state);
648 self.check_size_limit()?;
649 Ok(id)
650 }
651
652 /// Add a transition from one state to another.
653 ///
654 /// This routine is called "patch" since it is very common to add the
655 /// states you want, typically with "dummy" state ID transitions, and then
656 /// "patch" in the real state IDs later. This is because you don't always
657 /// know all of the necessary state IDs to add because they might not
658 /// exist yet.
659 ///
660 /// # Errors
661 ///
662 /// This may error if patching leads to an increase in heap usage beyond
663 /// the configured size limit. Heap usage only grows when patching adds a
664 /// new transition (as in the case of a "splits" state).
665 fn patch(&self, from: StateID, to: StateID) -> Result<(), Error> {
666 let mut new_memory_extra = self.nfa.borrow().memory_extra;
667 match self.nfa.borrow_mut().states[from.as_usize()] {
668 State::Char { ref mut target, .. } => {
669 *target = to;
670 }
671 State::Ranges { ref mut target, .. } => {
672 *target = to;
673 }
674 State::Splits { ref mut targets, .. } => {
675 targets.push(to);
676 new_memory_extra += size_of::<StateID>();
677 }
678 State::Goto { ref mut target, .. } => {
679 *target = to;
680 }
681 State::Capture { ref mut target, .. } => {
682 *target = to;
683 }
684 State::Fail | State::Match => {}
685 }
686 if new_memory_extra != self.nfa.borrow().memory_extra {
687 self.nfa.borrow_mut().memory_extra = new_memory_extra;
688 self.check_size_limit()?;
689 }
690 Ok(())
691 }
692
693 /// Checks that the current heap memory usage of the NFA being compiled
694 /// doesn't exceed the configured size limit. If it does, an error is
695 /// returned.
696 fn check_size_limit(&self) -> Result<(), Error> {
697 if let Some(limit) = self.config.size_limit {
698 if self.nfa.borrow().memory_usage() > limit {
699 return Err(Error::new("compiled regex exceeded size limit"));
700 }
701 }
702 Ok(())
703 }
704}
705
706/// A value that represents the result of compiling a sub-expression of a
707/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
708/// has an initial state at `start` and a final state at `end`.
709#[derive(Clone, Copy, Debug)]
710struct ThompsonRef {
711 start: StateID,
712 end: StateID,
713}
714